You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ol...@apache.org on 2011/06/08 23:00:28 UTC

svn commit: r1133554 [1/5] - in /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm: ./ accessor/ assertion/ core/ exception/ lzx/

Author: oleg
Date: Wed Jun  8 21:00:27 2011
New Revision: 1133554

URL: http://svn.apache.org/viewvc?rev=1133554&view=rev
Log:
support of Java 5

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItspHeader.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcControlData.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmLzxcResetTable.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmgiHeader.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmPmglHeader.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/DirectoryListingEntry.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/assertion/ChmAssert.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmCommons.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmConstants.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmExtractor.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/core/ChmWrapper.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/exception/ChmParsingException.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmBlockInfo.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxBlock.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/lzx/ChmSection.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHM2XHTML.java Wed Jun  8 21:00:27 2011
@@ -29,30 +29,30 @@ import org.xml.sax.SAXException;
  */
 public class CHM2XHTML {
 
-	public static void process(CHMDocumentInformation chmDoc,
-			ContentHandler handler) throws TikaException {
-		String text = chmDoc.getText();
-		try {
-			if (text.length() > 0) {
-				handler.characters(text.toCharArray(), 0, text.length());
-				new CHM2XHTML(chmDoc, handler);
-			} else
-				/* The error handling should be added */
-				System.err.println("Could not extract content");
+    public static void process(CHMDocumentInformation chmDoc,
+            ContentHandler handler) throws TikaException {
+        String text = chmDoc.getText();
+        try {
+            if (text.length() > 0) {
+                handler.characters(text.toCharArray(), 0, text.length());
+                new CHM2XHTML(chmDoc, handler);
+            } else
+                /* The error handling should be added */
+                System.err.println("Could not extract content");
 
-		} catch (SAXException e) {
-			// System.err.println(ChmParserUtils.getStackTrace(e.getStackTrace()));
-		}
-	}
+        } catch (SAXException e) {
+            // System.err.println(ChmParserUtils.getStackTrace(e.getStackTrace()));
+        }
+    }
 
-	protected String getText(CHMDocumentInformation chmDoc)
-			throws TikaException {
-		return chmDoc.getText();
-	}
+    protected String getText(CHMDocumentInformation chmDoc)
+            throws TikaException {
+        return chmDoc.getText();
+    }
 
-	protected TextContentHandler handler;
+    protected TextContentHandler handler;
 
-	public CHM2XHTML(CHMDocumentInformation chmDoc, ContentHandler handler) {
-		this.handler = new TextContentHandler(handler);
-	}
+    public CHM2XHTML(CHMDocumentInformation chmDoc, ContentHandler handler) {
+        this.handler = new TextContentHandler(handler);
+    }
 }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/CHMDocumentInformation.java Wed Jun  8 21:00:27 2011
@@ -37,163 +37,162 @@ import org.xml.sax.SAXException;
  * 
  */
 public class CHMDocumentInformation {
-	/* Class members */
-	private ChmExtractor chmExtractor = null;
+    /* Class members */
+    private ChmExtractor chmExtractor = null;
 
-	/**
-	 * Loads chm file as input stream and returns a new instance of chm doc info
-	 * 
-	 * @param is
-	 *            InputStream
-	 * 
-	 * @return chm document information
-	 */
-	public static CHMDocumentInformation load(InputStream is) {
-		return new CHMDocumentInformation().getInstance(is);
-	}
-
-	/**
-	 * Returns instance of chm document information
-	 * 
-	 * @param is
-	 *            InputStream
-	 * 
-	 * @return
-	 */
-	private CHMDocumentInformation getInstance(InputStream is) {
-		setChmExtractor(new ChmExtractor(is));
-		return this;
-	}
-
-	/**
-	 * Appends extracted data from chm listing entries
-	 * 
-	 * @return extracted content of chm
-	 */
-	private String getContent() {
-		StringBuilder sb = new StringBuilder();
-		DirectoryListingEntry entry;
-		for (Iterator<DirectoryListingEntry> it = getChmExtractor()
-				.getChmDirList().getDirectoryListingEntryList().iterator(); it
-				.hasNext();) {
-			try {
-				entry = it.next();
-				if (isRightEntry(entry)) {
-					byte[][] tmp = getChmExtractor().extractChmEntry(entry);
-					if (tmp != null) {
-						sb.append(extract(tmp));
-					}
-				}
-			} catch (ChmParsingException e) {// catch (IOException e) {
-				System.out.println(e.getMessage());
-			} // catch (IOException e) {//Pushback exception from tagsoup
-			// System.err.println(e.getMessage());
-		}
-		return sb.toString();
-	}
-
-	/**
-	 * Checks if an entry is a html or not.
-	 * 
-	 * @param entry
-	 *            chm directory listing entry
-	 * 
-	 * @return boolean
-	 */
-	private boolean isRightEntry(DirectoryListingEntry entry) {
-		return (entry.getName().endsWith(".html") || entry.getName().endsWith(
-				".htm"));
-	}
-
-	/**
-	 * Returns chm extractor
-	 * 
-	 * @return chmExtractor
-	 */
-	private ChmExtractor getChmExtractor() {
-		return chmExtractor;
-	}
-
-	/**
-	 * Sets a chm extractor
-	 * 
-	 * @param chmExtractor
-	 */
-	private void setChmExtractor(ChmExtractor chmExtractor) {
-		this.chmExtractor = chmExtractor;
-	}
-
-	/**
-	 * Returns chm metadata
-	 * 
-	 * @param metadata
-	 * 
-	 * @throws TikaException
-	 * @throws IOException
-	 */
-	public void getCHMDocInformation(Metadata metadata) throws TikaException,
-			IOException {
-		if (getChmExtractor() != null) {
-			/* Checking if file is a chm, done during creating chmItsf header */
-			metadata.add(Metadata.CONTENT_TYPE, "application/x-chm");
-		} else {
-			metadata.add(Metadata.CONTENT_TYPE, "unknown");
-		}
-	}
-
-	/**
-	 * Returns extracted text from chm file
-	 * 
-	 * @return text
-	 * 
-	 * @throws TikaException
-	 */
-	public String getText() throws TikaException {
-		return getContent();
-	}
-
-	/**
-	 * Extracts data from byte[][]
-	 * 
-	 * @param byteObject
-	 * @return
-	 * @throws IOException
-	 * @throws SAXException
-	 */
-	private String extract(byte[][] byteObject) {// throws IOException
-		StringBuilder wBuf = new StringBuilder();
-		InputStream stream = null;
-		Metadata metadata = new Metadata();
-		HtmlParser htmlParser = new HtmlParser();
-		BodyContentHandler handler = new BodyContentHandler(-1);// -1
-		ParseContext parser = new ParseContext();
-		try {
-			for (int i = 0; i < byteObject.length; i++) {
-				stream = new ByteArrayInputStream(byteObject[i]);
-				try {
-					htmlParser.parse(stream, handler, metadata, parser);
-				} catch (TikaException e) {
-					wBuf.append(new String(byteObject[i]));
-					System.err.println("\n"
-							+ CHMDocumentInformation.class.getName()
-							+ " extract " + e.getMessage());
-				} finally {
-					wBuf.append(handler.toString()
-							+ System.getProperty("line.separator"));
-					stream.close();
-				}
-			}
-		} catch (ChmParsingException e) {
-			System.err.println(e.getMessage());
-		} catch (SAXException e) {
-			System.err.println(e.getMessage());
-		} catch (IOException e) {// Pushback overflow from tagsoup
-		// System.err.println(e.getMessage());
-		}
-		return wBuf.toString();
-	}
+    /**
+     * Loads chm file as input stream and returns a new instance of chm doc info
+     * 
+     * @param is
+     *            InputStream
+     * 
+     * @return chm document information
+     */
+    public static CHMDocumentInformation load(InputStream is) {
+        return new CHMDocumentInformation().getInstance(is);
+    }
+
+    /**
+     * Returns instance of chm document information
+     * 
+     * @param is
+     *            InputStream
+     * 
+     * @return
+     */
+    private CHMDocumentInformation getInstance(InputStream is) {
+        setChmExtractor(new ChmExtractor(is));
+        return this;
+    }
+
+    /**
+     * Appends extracted data from chm listing entries
+     * 
+     * @return extracted content of chm
+     */
+    private String getContent() {
+        StringBuilder sb = new StringBuilder();
+        DirectoryListingEntry entry;
+        for (Iterator<DirectoryListingEntry> it = getChmExtractor()
+                .getChmDirList().getDirectoryListingEntryList().iterator(); it
+                .hasNext();) {
+            try {
+                entry = it.next();
+                if (isRightEntry(entry)) {
+                    byte[][] tmp = getChmExtractor().extractChmEntry(entry);
+                    if (tmp != null) {
+                        sb.append(extract(tmp));
+                    }
+                }
+            } catch (ChmParsingException e) {// catch (IOException e) {
+                System.out.println(e.getMessage());
+            } // catch (IOException e) {//Pushback exception from tagsoup
+            // System.err.println(e.getMessage());
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Checks if an entry is a html or not.
+     * 
+     * @param entry
+     *            chm directory listing entry
+     * 
+     * @return boolean
+     */
+    private boolean isRightEntry(DirectoryListingEntry entry) {
+        return (entry.getName().endsWith(".html") || entry.getName().endsWith(".htm"));
+    }
+
+    /**
+     * Returns chm extractor
+     * 
+     * @return chmExtractor
+     */
+    private ChmExtractor getChmExtractor() {
+        return chmExtractor;
+    }
+
+    /**
+     * Sets a chm extractor
+     * 
+     * @param chmExtractor
+     */
+    private void setChmExtractor(ChmExtractor chmExtractor) {
+        this.chmExtractor = chmExtractor;
+    }
+
+    /**
+     * Returns chm metadata
+     * 
+     * @param metadata
+     * 
+     * @throws TikaException
+     * @throws IOException
+     */
+    public void getCHMDocInformation(Metadata metadata) throws TikaException,
+            IOException {
+        if (getChmExtractor() != null) {
+            /* Checking if file is a chm, done during creating chmItsf header */
+            metadata.add(Metadata.CONTENT_TYPE, "application/x-chm");
+        } else {
+            metadata.add(Metadata.CONTENT_TYPE, "unknown");
+        }
+    }
+
+    /**
+     * Returns extracted text from chm file
+     * 
+     * @return text
+     * 
+     * @throws TikaException
+     */
+    public String getText() throws TikaException {
+        return getContent();
+    }
+
+    /**
+     * Extracts data from byte[][]
+     * 
+     * @param byteObject
+     * @return
+     * @throws IOException
+     * @throws SAXException
+     */
+    private String extract(byte[][] byteObject) {// throws IOException
+        StringBuilder wBuf = new StringBuilder();
+        InputStream stream = null;
+        Metadata metadata = new Metadata();
+        HtmlParser htmlParser = new HtmlParser();
+        BodyContentHandler handler = new BodyContentHandler(-1);// -1
+        ParseContext parser = new ParseContext();
+        try {
+            for (int i = 0; i < byteObject.length; i++) {
+                stream = new ByteArrayInputStream(byteObject[i]);
+                try {
+                    htmlParser.parse(stream, handler, metadata, parser);
+                } catch (TikaException e) {
+                    wBuf.append(new String(byteObject[i]));
+                    System.err.println("\n"
+                            + CHMDocumentInformation.class.getName()
+                            + " extract " + e.getMessage());
+                } finally {
+                    wBuf.append(handler.toString()
+                            + System.getProperty("line.separator"));
+                    stream.close();
+                }
+            }
+        } catch (ChmParsingException e) {
+            System.err.println(e.getMessage());
+        } catch (SAXException e) {
+            System.err.println(e.getMessage());
+        } catch (IOException e) {// Pushback overflow from tagsoup
+        // System.err.println(e.getMessage());
+        }
+        return wBuf.toString();
+    }
 
-	public static void main(String[] args) {
+    public static void main(String[] args) {
 
-	}
+    }
 }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java Wed Jun  8 21:00:27 2011
@@ -31,25 +31,26 @@ import org.xml.sax.SAXException;
 
 public class ChmParser extends AbstractParser {
 
-	private static final long serialVersionUID = 5938777307516469802L;
-	private static final Set<MediaType> SUPPORTED_TYPES = Collections
-			.singleton(MediaType.application("chm"));
-
-	public Set<MediaType> getSupportedTypes(ParseContext context) {
-		return SUPPORTED_TYPES;
-	}
-	
-	public void parse(InputStream stream, ContentHandler handler,
-			Metadata metadata, ParseContext context) throws IOException,
-			SAXException, TikaException {
-		CHMDocumentInformation chmInfo = CHMDocumentInformation.load(stream);
-		metadata.set(Metadata.CONTENT_TYPE, "chm");
-		extractMetadata(chmInfo, metadata);
-		CHM2XHTML.process(chmInfo, handler);
-	}
-
-	private void extractMetadata(CHMDocumentInformation chmInfo,
-			Metadata metadata) throws TikaException, IOException {
-		chmInfo.getCHMDocInformation(metadata);
-	}
+    private static final long serialVersionUID = 5938777307516469802L;
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections
+            .singleton(MediaType.application("chm"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+
+    public void parse(InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+        CHMDocumentInformation chmInfo = CHMDocumentInformation.load(stream);
+        metadata.set(Metadata.CONTENT_TYPE, "chm");
+        extractMetadata(chmInfo, metadata);
+        CHM2XHTML.process(chmInfo, handler);
+    }
+
+    private void extractMetadata(CHMDocumentInformation chmInfo,
+            Metadata metadata) throws TikaException, IOException {
+        chmInfo.getCHMDocInformation(metadata);
+    }
 }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmAccessor.java Wed Jun  8 21:00:27 2011
@@ -25,12 +25,12 @@ import java.io.Serializable;
  * @param <T>
  */
 public interface ChmAccessor<T> extends Serializable {
-	/**
-	 * Parses chm accessor
-	 * 
-	 * @param data
-	 *            chm file
-	 * @param chmAccessor
-	 */
-	void parse(byte[] data, T chmAccessor);
+    /**
+     * Parses chm accessor
+     * 
+     * @param data
+     *            chm file
+     * @param chmAccessor
+     */
+    void parse(byte[] data, T chmAccessor);
 }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmDirectoryListingSet.java Wed Jun  8 21:00:27 2011
@@ -18,7 +18,6 @@ package org.apache.tika.parser.chm.acces
 
 import java.math.BigInteger;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 import org.apache.tika.parser.chm.core.ChmCommons;
@@ -28,359 +27,369 @@ import org.apache.tika.parser.chm.core.C
  * Holds chm listing entries
  */
 public class ChmDirectoryListingSet {
-	private List<DirectoryListingEntry> dlel;
-	private byte[] data;
-	private int placeHolder = -1;
-	private long dataOffset = -1;
-	private int controlDataIndex = -1;
-	private int resetTableIndex = -1;
-
-	private boolean isNotControlDataFound = true;
-	private boolean isNotResetTableFound = true;
-
-	/**
-	 * Constructs chm directory listing set
-	 * 
-	 * @param data
-	 *            byte[]
-	 * @param chmItsHeader
-	 * @param chmItspHeader
-	 */
-	public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
-			ChmItspHeader chmItspHeader) {
-		setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
-		ChmCommons.assertByteArrayNotNull(data);
-		setData(data);
-		enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
-	}
-
-	public String toString() {
-		StringBuilder sb = new StringBuilder();
-		sb.append("list:=" + getDirectoryListingEntryList().toString()
-				+ System.getProperty("line.separator"));
-		sb.append("number of list items:="
-				+ getDirectoryListingEntryList().size());
-		return sb.toString();
-	}
-
-	/**
-	 * Returns control data index that located in List
-	 * 
-	 * @return control data index
-	 */
-	public int getControlDataIndex() {
-		return controlDataIndex;
-	}
-
-	/**
-	 * Sets control data index
-	 * 
-	 * @param controlDataIndex
-	 */
-	protected void setControlDataIndex(int controlDataIndex) {
-		this.controlDataIndex = controlDataIndex;
-	}
-
-	/**
-	 * Return index of reset table
-	 * 
-	 * @return reset table index
-	 */
-	public int getResetTableIndex() {
-		return resetTableIndex;
-	}
-
-	/**
-	 * Sets reset table index
-	 * 
-	 * @param resetTableIndex
-	 */
-	protected void setResetTableIndex(int resetTableIndex) {
-		this.resetTableIndex = resetTableIndex;
-	}
-
-	/**
-	 * Gets place holder
-	 * 
-	 * @return place holder
-	 */
-	private int getPlaceHolder() {
-		return placeHolder;
-	}
-
-	/**
-	 * Sets place holder
-	 * 
-	 * @param placeHolder
-	 */
-	private void setPlaceHolder(int placeHolder) {
-		this.placeHolder = placeHolder;
-	}
-
-	/**
-	 * Enumerates chm directory listing entries
-	 * 
-	 * @param chmItsHeader
-	 *            chm itsf header
-	 * @param chmItspHeader
-	 *            chm itsp header
-	 */
-	private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
-			ChmItspHeader chmItspHeader) {
-		try {
-			int startPmgl = chmItspHeader.getIndex_head();
-			int stopPmgl = chmItspHeader.getUnknown_0024();
-			int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
-					.getHeader_len());
-			setDataOffset(chmItsHeader.getDataOffset());
-
-			/* loops over all pmgls */
-			int previous_index = 0;
-			byte[] dir_chunk = null;
-			for (int i = startPmgl; i <= stopPmgl; i++) {
-				int data_copied = ((1 + i) * (int) chmItspHeader.getBlock_len())
-						+ dir_offset;
-				if (i == 0) {
-					dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
-					dir_chunk = Arrays
-							.copyOfRange(getData(), dir_offset,
-									(((1 + i) * (int) chmItspHeader
-											.getBlock_len()) + dir_offset));
-					previous_index = data_copied;
-				} else {
-					dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
-					dir_chunk = Arrays
-							.copyOfRange(getData(), previous_index,
-									(((1 + i) * (int) chmItspHeader
-											.getBlock_len()) + dir_offset));
-					previous_index = data_copied;
-				}
-				enumerateOneSegment(dir_chunk);
-				dir_chunk = null;
-			}
-		} catch (Exception e) {
-			e.printStackTrace();
-		} finally {
-			setData(null);
-		}
-	}
-
-	/**
-	 * Checks control data
-	 * 
-	 * @param dle
-	 *            chm directory listing entry
-	 */
-	private void checkControlData(DirectoryListingEntry dle) {
-		if (isNotControlDataFound) {
-			if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
-				setControlDataIndex(getDirectoryListingEntryList().size());
-				isNotControlDataFound = false;
-			}
-		}
-	}
-
-	/**
-	 * Checks reset table
-	 * 
-	 * @param dle
-	 *            chm directory listing entry
-	 */
-	private void checkResetTable(DirectoryListingEntry dle) {
-		if (isNotResetTableFound) {
-			if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
-				setResetTableIndex(getDirectoryListingEntryList().size());
-				isNotResetTableFound = false;
-			}
-		}
-	}
-
-	/**
-	 * Enumerates chm directory listing entries in single chm segment
-	 * 
-	 * @param dir_chunk
-	 */
-	private void enumerateOneSegment(byte[] dir_chunk) {
-		try {
-			if (dir_chunk != null) {
-
-				int indexWorkData = ChmCommons.indexOf(dir_chunk,
-						"::".getBytes());
-				int indexUserData = ChmCommons.indexOf(dir_chunk,
-						"/".getBytes());
-
-				if (indexUserData < indexWorkData)
-					setPlaceHolder(indexUserData);
-				else
-					setPlaceHolder(indexWorkData);
-
-				if (getPlaceHolder() > 0
-						&& dir_chunk[getPlaceHolder() - 1] != 115) {// #{
-					do {
-						if (dir_chunk[getPlaceHolder() - 1] > 0) {
-							DirectoryListingEntry dle = new DirectoryListingEntry();
-
-							// two cases: 1. when dir_chunk[getPlaceHolder() -
-							// 1] == 0x73
-							// 2. when dir_chunk[getPlaceHolder() + 1] == 0x2f
-							doNameCheck(dir_chunk, dle);
-
-							dle.setName(new String(Arrays.copyOfRange(
-									dir_chunk, getPlaceHolder(),
-									(getPlaceHolder() + dle.getNameLength()))));
-							checkControlData(dle);
-							checkResetTable(dle);
-							setPlaceHolder(getPlaceHolder()
-									+ dle.getNameLength());
-
-							/* Sets entry type */
-							if (getPlaceHolder() < dir_chunk.length
-									&& dir_chunk[getPlaceHolder()] == 0)
-								dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
-							else
-								dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
-
-							setPlaceHolder(getPlaceHolder() + 1);
-							dle.setOffset(getEncint(dir_chunk));
-							dle.setLength(getEncint(dir_chunk));
-							getDirectoryListingEntryList().add(dle);
-						} else
-							setPlaceHolder(getPlaceHolder() + 1);
-
-					} while (hasNext(dir_chunk));
-				}
-			}
-
-		} catch (Exception e) {
-			e.printStackTrace();
-		}
-	}
-
-	/**
-	 * Checks if a name and name length are correct. If not then handles it as
-	 * follows: 1. when dir_chunk[getPlaceHolder() - 1] == 0x73 ('/') 2. when
-	 * dir_chunk[getPlaceHolder() + 1] == 0x2f ('s')
-	 * 
-	 * @param dir_chunk
-	 * @param dle
-	 */
-	private void doNameCheck(byte[] dir_chunk, DirectoryListingEntry dle) {
-		if (dir_chunk[getPlaceHolder() - 1] == 0x73) {
-			dle.setNameLength(dir_chunk[getPlaceHolder() - 1] & 0x21);
-		} else if (dir_chunk[getPlaceHolder() + 1] == 0x2f) {
-			dle.setNameLength(dir_chunk[getPlaceHolder()]);
-			setPlaceHolder(getPlaceHolder() + 1);
-		} else {
-			dle.setNameLength(dir_chunk[getPlaceHolder() - 1]);
-		}
-	}
-
-	/**
-	 * Checks if it's possible move further on byte[]
-	 * 
-	 * @param dir_chunk
-	 * 
-	 * @return boolean
-	 */
-	private boolean hasNext(byte[] dir_chunk) {
-		while (getPlaceHolder() < dir_chunk.length) {
-			if (dir_chunk[getPlaceHolder()] == 47
-					&& dir_chunk[getPlaceHolder() + 1] != ':') {
-				setPlaceHolder(getPlaceHolder());
-				return true;
-			} else if (dir_chunk[getPlaceHolder()] == ':'
-					&& dir_chunk[getPlaceHolder() + 1] == ':') {
-				setPlaceHolder(getPlaceHolder());
-				return true;
-			} else
-				setPlaceHolder(getPlaceHolder() + 1);
-		}
-		return false;
-	}
-
-	/**
-	 * Returns encrypted integer
-	 * 
-	 * @param data_chunk
-	 * 
-	 * @return
-	 */
-	private int getEncint(byte[] data_chunk) {
-		byte ob;
-		BigInteger bi = BigInteger.ZERO;
-		byte[] nb = new byte[1];
-
-		if (getPlaceHolder() < data_chunk.length) {
-			while ((ob = data_chunk[getPlaceHolder()]) < 0) {
-				nb[0] = (byte) ((ob & 0x7f));
-				bi = bi.shiftLeft(7).add(new BigInteger(nb));
-				setPlaceHolder(getPlaceHolder() + 1);
-			}
-			nb[0] = (byte) ((ob & 0x7f));
-			bi = bi.shiftLeft(7).add(new BigInteger(nb));
-			setPlaceHolder(getPlaceHolder() + 1);
-		}
-		return bi.intValue();
-	}
-
-	/**
-	 * @param args
-	 */
-	public static void main(String[] args) {
-	}
-
-	/**
-	 * Sets chm directory listing entry list
-	 * 
-	 * @param dlel
-	 *            chm directory listing entry list
-	 */
-	public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
-		this.dlel = dlel;
-	}
-
-	/**
-	 * Returns chm directory listing entry list
-	 * 
-	 * @return List<DirectoryListingEntry>
-	 */
-	public List<DirectoryListingEntry> getDirectoryListingEntryList() {
-		return dlel;
-	}
-
-	/**
-	 * Sets data
-	 * 
-	 * @param data
-	 */
-	private void setData(byte[] data) {
-		this.data = data;
-	}
-
-	/**
-	 * Returns data
-	 * 
-	 * @return
-	 */
-	private byte[] getData() {
-		return data;
-	}
-
-	/**
-	 * Sets data offset
-	 * 
-	 * @param dataOffset
-	 */
-	private void setDataOffset(long dataOffset) {
-		this.dataOffset = dataOffset;
-	}
-
-	/**
-	 * Returns data offset
-	 * 
-	 * @return dataOffset
-	 */
-	public long getDataOffset() {
-		return dataOffset;
-	}
+    private List<DirectoryListingEntry> dlel;
+    private byte[] data;
+    private int placeHolder = -1;
+    private long dataOffset = -1;
+    private int controlDataIndex = -1;
+    private int resetTableIndex = -1;
+
+    private boolean isNotControlDataFound = true;
+    private boolean isNotResetTableFound = true;
+
+    /**
+     * Constructs chm directory listing set
+     * 
+     * @param data
+     *            byte[]
+     * @param chmItsHeader
+     * @param chmItspHeader
+     */
+    public ChmDirectoryListingSet(byte[] data, ChmItsfHeader chmItsHeader,
+            ChmItspHeader chmItspHeader) {
+        setDirectoryListingEntryList(new ArrayList<DirectoryListingEntry>());
+        ChmCommons.assertByteArrayNotNull(data);
+        setData(data);
+        enumerateChmDirectoryListingList(chmItsHeader, chmItspHeader);
+    }
+
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("list:=" + getDirectoryListingEntryList().toString()
+                + System.getProperty("line.separator"));
+        sb.append("number of list items:="
+                + getDirectoryListingEntryList().size());
+        return sb.toString();
+    }
+
+    /**
+     * Returns control data index that located in List
+     * 
+     * @return control data index
+     */
+    public int getControlDataIndex() {
+        return controlDataIndex;
+    }
+
+    /**
+     * Sets control data index
+     * 
+     * @param controlDataIndex
+     */
+    protected void setControlDataIndex(int controlDataIndex) {
+        this.controlDataIndex = controlDataIndex;
+    }
+
+    /**
+     * Return index of reset table
+     * 
+     * @return reset table index
+     */
+    public int getResetTableIndex() {
+        return resetTableIndex;
+    }
+
+    /**
+     * Sets reset table index
+     * 
+     * @param resetTableIndex
+     */
+    protected void setResetTableIndex(int resetTableIndex) {
+        this.resetTableIndex = resetTableIndex;
+    }
+
+    /**
+     * Gets place holder
+     * 
+     * @return place holder
+     */
+    private int getPlaceHolder() {
+        return placeHolder;
+    }
+
+    /**
+     * Sets place holder
+     * 
+     * @param placeHolder
+     */
+    private void setPlaceHolder(int placeHolder) {
+        this.placeHolder = placeHolder;
+    }
+
+    /**
+     * Enumerates chm directory listing entries
+     * 
+     * @param chmItsHeader
+     *            chm itsf header
+     * @param chmItspHeader
+     *            chm itsp header
+     */
+    private void enumerateChmDirectoryListingList(ChmItsfHeader chmItsHeader,
+            ChmItspHeader chmItspHeader) {
+        try {
+            int startPmgl = chmItspHeader.getIndex_head();
+            int stopPmgl = chmItspHeader.getUnknown_0024();
+            int dir_offset = (int) (chmItsHeader.getDirOffset() + chmItspHeader
+                    .getHeader_len());
+            setDataOffset(chmItsHeader.getDataOffset());
+
+            /* loops over all pmgls */
+            int previous_index = 0;
+            byte[] dir_chunk = null;
+            for (int i = startPmgl; i <= stopPmgl; i++) {
+                int data_copied = ((1 + i) * (int) chmItspHeader.getBlock_len())
+                        + dir_offset;
+                if (i == 0) {
+                    dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
+                    // dir_chunk = Arrays.copyOfRange(getData(), dir_offset,
+                    // (((1+i) * (int)chmItspHeader.getBlock_len()) +
+                    // dir_offset));
+                    dir_chunk = ChmCommons
+                            .copyOfRange(getData(), dir_offset,
+                                    (((1 + i) * (int) chmItspHeader
+                                            .getBlock_len()) + dir_offset));
+                    previous_index = data_copied;
+                } else {
+                    dir_chunk = new byte[(int) chmItspHeader.getBlock_len()];
+                    // dir_chunk = Arrays.copyOfRange(getData(), previous_index,
+                    // (((1+i) * (int)chmItspHeader.getBlock_len()) +
+                    // dir_offset));
+                    dir_chunk = ChmCommons
+                            .copyOfRange(getData(), previous_index,
+                                    (((1 + i) * (int) chmItspHeader
+                                            .getBlock_len()) + dir_offset));
+                    previous_index = data_copied;
+                }
+                enumerateOneSegment(dir_chunk);
+                dir_chunk = null;
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        } finally {
+            setData(null);
+        }
+    }
+
+    /**
+     * Checks control data
+     * 
+     * @param dle
+     *            chm directory listing entry
+     */
+    private void checkControlData(DirectoryListingEntry dle) {
+        if (isNotControlDataFound) {
+            if (dle.getName().contains(ChmConstants.CONTROL_DATA)) {
+                setControlDataIndex(getDirectoryListingEntryList().size());
+                isNotControlDataFound = false;
+            }
+        }
+    }
+
+    /**
+     * Checks reset table
+     * 
+     * @param dle
+     *            chm directory listing entry
+     */
+    private void checkResetTable(DirectoryListingEntry dle) {
+        if (isNotResetTableFound) {
+            if (dle.getName().contains(ChmConstants.RESET_TABLE)) {
+                setResetTableIndex(getDirectoryListingEntryList().size());
+                isNotResetTableFound = false;
+            }
+        }
+    }
+
+    /**
+     * Enumerates chm directory listing entries in single chm segment
+     * 
+     * @param dir_chunk
+     */
+    private void enumerateOneSegment(byte[] dir_chunk) {
+        try {
+            if (dir_chunk != null) {
+
+                int indexWorkData = ChmCommons.indexOf(dir_chunk,
+                        "::".getBytes());
+                int indexUserData = ChmCommons.indexOf(dir_chunk,
+                        "/".getBytes());
+
+                if (indexUserData < indexWorkData)
+                    setPlaceHolder(indexUserData);
+                else
+                    setPlaceHolder(indexWorkData);
+
+                if (getPlaceHolder() > 0
+                        && dir_chunk[getPlaceHolder() - 1] != 115) {// #{
+                    do {
+                        if (dir_chunk[getPlaceHolder() - 1] > 0) {
+                            DirectoryListingEntry dle = new DirectoryListingEntry();
+
+                            // two cases: 1. when dir_chunk[getPlaceHolder() -
+                            // 1] == 0x73
+                            // 2. when dir_chunk[getPlaceHolder() + 1] == 0x2f
+                            doNameCheck(dir_chunk, dle);
+
+                            // dle.setName(new
+                            // String(Arrays.copyOfRange(dir_chunk,
+                            // getPlaceHolder(), (getPlaceHolder() +
+                            // dle.getNameLength()))));
+                            dle.setName(new String(ChmCommons.copyOfRange(
+                                    dir_chunk, getPlaceHolder(),
+                                    (getPlaceHolder() + dle.getNameLength()))));
+                            checkControlData(dle);
+                            checkResetTable(dle);
+                            setPlaceHolder(getPlaceHolder()
+                                    + dle.getNameLength());
+
+                            /* Sets entry type */
+                            if (getPlaceHolder() < dir_chunk.length
+                                    && dir_chunk[getPlaceHolder()] == 0)
+                                dle.setEntryType(ChmCommons.EntryType.UNCOMPRESSED);
+                            else
+                                dle.setEntryType(ChmCommons.EntryType.COMPRESSED);
+
+                            setPlaceHolder(getPlaceHolder() + 1);
+                            dle.setOffset(getEncint(dir_chunk));
+                            dle.setLength(getEncint(dir_chunk));
+                            getDirectoryListingEntryList().add(dle);
+                        } else
+                            setPlaceHolder(getPlaceHolder() + 1);
+
+                    } while (hasNext(dir_chunk));
+                }
+            }
+
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * Checks if a name and name length are correct. If not then handles it as
+     * follows: 1. when dir_chunk[getPlaceHolder() - 1] == 0x73 ('/') 2. when
+     * dir_chunk[getPlaceHolder() + 1] == 0x2f ('s')
+     * 
+     * @param dir_chunk
+     * @param dle
+     */
+    private void doNameCheck(byte[] dir_chunk, DirectoryListingEntry dle) {
+        if (dir_chunk[getPlaceHolder() - 1] == 0x73) {
+            dle.setNameLength(dir_chunk[getPlaceHolder() - 1] & 0x21);
+        } else if (dir_chunk[getPlaceHolder() + 1] == 0x2f) {
+            dle.setNameLength(dir_chunk[getPlaceHolder()]);
+            setPlaceHolder(getPlaceHolder() + 1);
+        } else {
+            dle.setNameLength(dir_chunk[getPlaceHolder() - 1]);
+        }
+    }
+
+    /**
+     * Checks if it's possible move further on byte[]
+     * 
+     * @param dir_chunk
+     * 
+     * @return boolean
+     */
+    private boolean hasNext(byte[] dir_chunk) {
+        while (getPlaceHolder() < dir_chunk.length) {
+            if (dir_chunk[getPlaceHolder()] == 47
+                    && dir_chunk[getPlaceHolder() + 1] != ':') {
+                setPlaceHolder(getPlaceHolder());
+                return true;
+            } else if (dir_chunk[getPlaceHolder()] == ':'
+                    && dir_chunk[getPlaceHolder() + 1] == ':') {
+                setPlaceHolder(getPlaceHolder());
+                return true;
+            } else
+                setPlaceHolder(getPlaceHolder() + 1);
+        }
+        return false;
+    }
+
+    /**
+     * Returns encrypted integer
+     * 
+     * @param data_chunk
+     * 
+     * @return
+     */
+    private int getEncint(byte[] data_chunk) {
+        byte ob;
+        BigInteger bi = BigInteger.ZERO;
+        byte[] nb = new byte[1];
+
+        if (getPlaceHolder() < data_chunk.length) {
+            while ((ob = data_chunk[getPlaceHolder()]) < 0) {
+                nb[0] = (byte) ((ob & 0x7f));
+                bi = bi.shiftLeft(7).add(new BigInteger(nb));
+                setPlaceHolder(getPlaceHolder() + 1);
+            }
+            nb[0] = (byte) ((ob & 0x7f));
+            bi = bi.shiftLeft(7).add(new BigInteger(nb));
+            setPlaceHolder(getPlaceHolder() + 1);
+        }
+        return bi.intValue();
+    }
+
+    /**
+     * @param args
+     */
+    public static void main(String[] args) {
+    }
+
+    /**
+     * Sets chm directory listing entry list
+     * 
+     * @param dlel
+     *            chm directory listing entry list
+     */
+    public void setDirectoryListingEntryList(List<DirectoryListingEntry> dlel) {
+        this.dlel = dlel;
+    }
+
+    /**
+     * Returns chm directory listing entry list
+     * 
+     * @return List<DirectoryListingEntry>
+     */
+    public List<DirectoryListingEntry> getDirectoryListingEntryList() {
+        return dlel;
+    }
+
+    /**
+     * Sets data
+     * 
+     * @param data
+     */
+    private void setData(byte[] data) {
+        this.data = data;
+    }
+
+    /**
+     * Returns data
+     * 
+     * @return
+     */
+    private byte[] getData() {
+        return data;
+    }
+
+    /**
+     * Sets data offset
+     * 
+     * @param dataOffset
+     */
+    private void setDataOffset(long dataOffset) {
+        this.dataOffset = dataOffset;
+    }
+
+    /**
+     * Returns data offset
+     * 
+     * @return dataOffset
+     */
+    public long getDataOffset() {
+        return dataOffset;
+    }
 }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java?rev=1133554&r1=1133553&r2=1133554&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/accessor/ChmItsfHeader.java Wed Jun  8 21:00:27 2011
@@ -39,459 +39,459 @@ import org.apache.tika.parser.chm.except
  */
 /* structure of ITSF headers */
 public class ChmItsfHeader implements ChmAccessor<ChmItsfHeader> {
-	private static final long serialVersionUID = 2215291838533213826L;
-	private byte[] signature = new String("ITSF").getBytes(); /* 0 (ITSF) */
-	private int version; /* 4 */
-	private int header_len; /* 8 */
-	private int unknown_000c; /* c */
-	private long last_modified; /* 10 */
-	private long lang_id; /* 14 */
-	private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
-	private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
-	private long unknown_offset; /* 38 */
-	private long unknown_len; /* 40 */
-	private long dir_offset; /* 48 */
-	private long dir_len; /* 50 */
-	private long data_offset; /* 58 (Not present before V3) */
-
-	/* local usage */
-	private int dataRemained;
-	private int currentPlace = 0;
-
-	/**
-	 * Prints the values of ChmfHeader
-	 */
-	public String toString() {
-		StringBuilder sb = new StringBuilder();
-		sb.append(new String(getSignature()) + " ");
-		sb.append(getVersion() + " ");
-		sb.append(getHeaderLen() + " ");
-		sb.append(getUnknown_000c() + " ");
-		sb.append(getLastModified() + " ");
-		sb.append(getLangId() + " ");
-		sb.append(getDir_uuid() + " ");
-		sb.append(getStream_uuid() + " ");
-		sb.append(getUnknownOffset() + " ");
-		sb.append(getUnknownLen() + " ");
-		sb.append(getDirOffset() + " ");
-		sb.append(getDirLen() + " ");
-		sb.append(getDataOffset() + " ");
-		return sb.toString();
-	}
-
-	/**
-	 * Returns a signature of itsf header
-	 * 
-	 * @return itsf header
-	 */
-	public byte[] getSignature() {
-		return signature;
-	}
-
-	/**
-	 * Sets itsf header signature
-	 * 
-	 * @param signature
-	 */
-	protected void setSignature(byte[] signature) {
-		this.signature = signature;
-	}
-
-	/**
-	 * Returns itsf header version
-	 * 
-	 * @return itsf version
-	 */
-	public int getVersion() {
-		return version;
-	}
-
-	/**
-	 * Sets itsf version
-	 * 
-	 * @param version
-	 */
-	protected void setVersion(int version) {
-		this.version = version;
-	}
-
-	/**
-	 * Returns itsf header length
-	 * 
-	 * @return length
-	 */
-	public int getHeaderLen() {
-		return header_len;
-	}
-
-	/**
-	 * Sets itsf header length
-	 * 
-	 * @param header_len
-	 */
-	protected void setHeaderLen(int header_len) {
-		this.header_len = header_len;
-	}
-
-	/**
-	 * Returns unknown_00c value
-	 * 
-	 * @return unknown_00c
-	 */
-	public int getUnknown_000c() {
-		return unknown_000c;
-	}
-
-	/**
-	 * Sets unknown_00c
-	 * 
-	 * @param unknown_000c
-	 */
-	protected void setUnknown_000c(int unknown_000c) {
-		this.unknown_000c = unknown_000c;
-	}
-
-	/**
-	 * Returns last modified date of the chm file
-	 * 
-	 * @return last modified date as long
-	 */
-	public long getLastModified() {
-		return last_modified;
-	}
-
-	/**
-	 * Sets last modified date of the chm file
-	 * 
-	 * @param last_modified
-	 */
-	protected void setLastModified(long last_modified) {
-		this.last_modified = last_modified;
-	}
-
-	/**
-	 * Returns language ID
-	 * 
-	 * @return language_id
-	 */
-	public long getLangId() {
-		return lang_id;
-	}
-
-	/**
-	 * Sets language_id
-	 * 
-	 * @param lang_id
-	 */
-	protected void setLangId(long lang_id) {
-		this.lang_id = lang_id;
-	}
-
-	/**
-	 * Returns directory uuid
-	 * 
-	 * @return dir_uuid
-	 */
-	public byte[] getDir_uuid() {
-		return dir_uuid;
-	}
-
-	/**
-	 * Sets directory uuid
-	 * 
-	 * @param dir_uuid
-	 */
-	protected void setDir_uuid(byte[] dir_uuid) {
-		this.dir_uuid = dir_uuid;
-	}
-
-	/**
-	 * Returns stream uuid
-	 * 
-	 * @return stream_uuid
-	 */
-	public byte[] getStream_uuid() {
-		return stream_uuid;
-	}
-
-	/**
-	 * Sets stream uuid
-	 * 
-	 * @param stream_uuid
-	 */
-	protected void setStream_uuid(byte[] stream_uuid) {
-		this.stream_uuid = stream_uuid;
-	}
-
-	/**
-	 * Returns unknown offset
-	 * 
-	 * @return unknown_offset
-	 */
-	public long getUnknownOffset() {
-		return unknown_offset;
-	}
-
-	/**
-	 * Sets unknown offset
-	 * 
-	 * @param unknown_offset
-	 */
-	protected void setUnknownOffset(long unknown_offset) {
-		this.unknown_offset = unknown_offset;
-	}
-
-	/**
-	 * Returns unknown length
-	 * 
-	 * @return unknown_length
-	 */
-	public long getUnknownLen() {
-		return unknown_len;
-	}
-
-	/**
-	 * Sets unknown length
-	 * 
-	 * @param unknown_len
-	 */
-	protected void setUnknownLen(long unknown_len) {
-		this.unknown_len = unknown_len;
-	}
-
-	/**
-	 * Returns directory offset
-	 * 
-	 * @return directory_offset
-	 */
-	public long getDirOffset() {
-		return dir_offset;
-	}
-
-	/**
-	 * Sets directory offset
-	 * 
-	 * @param dir_offset
-	 */
-	protected void setDirOffset(long dir_offset) {
-		this.dir_offset = dir_offset;
-	}
-
-	/**
-	 * Returns directory length
-	 * 
-	 * @return directory_offset
-	 */
-	public long getDirLen() {
-		return dir_len;
-	}
-
-	/**
-	 * Sets directory length
-	 * 
-	 * @param dir_len
-	 */
-	protected void setDirLen(long dir_len) {
-		this.dir_len = dir_len;
-	}
-
-	/**
-	 * Returns data offset
-	 * 
-	 * @return data_offset
-	 */
-	public long getDataOffset() {
-		return data_offset;
-	}
-
-	/**
-	 * Sets data offset
-	 * 
-	 * @param data_offset
-	 */
-	protected void setDataOffset(long data_offset) {
-		this.data_offset = data_offset;
-	}
-
-	/**
-	 * Copies 4 first bytes of the byte[]
-	 * 
-	 * @param data
-	 * @param chmItsfHeader
-	 * @param count
-	 */
-	private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
-			int count) {
-		ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
-		System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
-		this.setCurrentPlace(this.getCurrentPlace() + count);
-		this.setDataRemained(this.getDataRemained() - count);
-	}
-
-	/**
-	 * Copies X bytes of source byte[] to the dest byte[]
-	 * 
-	 * @param data
-	 * @param dest
-	 * @param count
-	 * @return
-	 */
-	private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
-		System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
-		this.setCurrentPlace(this.getCurrentPlace() + count);
-		this.setDataRemained(this.getDataRemained() - count);
-		return dest;
-	}
-
-	/**
-	 * Takes 8 bytes and reverses them
-	 * 
-	 * @param data
-	 * @param dest
-	 * @return
-	 */
-	private long unmarshalUint64(byte[] data, long dest) {
-		byte[] temp = new byte[8];
-		int i, j;
-
-		if (8 > this.getDataRemained())
-			throw new ChmParsingException("8 > this.getDataRemained()");
-
-		for (i = 8, j = 7; i > 0; i--) {
-			temp[j--] = data[this.getCurrentPlace()];
-			this.setCurrentPlace(this.getCurrentPlace() + 1);
-		}
-
-		dest = new BigInteger(temp).longValue();
-		this.setDataRemained(this.getDataRemained() - 8);
-		return dest;
-	}
-
-	private int unmarshalInt32(byte[] data, int dest) {
-		ChmAssert.assertByteArrayNotNull(data);
-
-		if (4 > this.getDataRemained())
-			throw new ChmParsingException("4 > dataLenght");
-		dest = data[this.getCurrentPlace()]
-				| data[this.getCurrentPlace() + 1] << 8
-				| data[this.getCurrentPlace() + 2] << 16
-				| data[this.getCurrentPlace() + 3] << 24;
-
-		this.setCurrentPlace(this.getCurrentPlace() + 4);
-		this.setDataRemained(this.getDataRemained() - 4);
-		return dest;
-	}
-
-	private long unmarshalUInt32(byte[] data, long dest) {
-		ChmAssert.assertByteArrayNotNull(data);
-		if (4 > getDataRemained())
-			throw new ChmParsingException("4 > dataLenght");
-		dest = data[this.getCurrentPlace()]
-				| data[this.getCurrentPlace() + 1] << 8
-				| data[this.getCurrentPlace() + 2] << 16
-				| data[this.getCurrentPlace() + 3] << 24;
-
-		setDataRemained(this.getDataRemained() - 4);
-		this.setCurrentPlace(this.getCurrentPlace() + 4);
-		return dest;
-	}
-
-	public static void main(String[] args) {
-	}
-
-	/**
-	 * Sets data remained to be processed
-	 * 
-	 * @param dataRemained
-	 */
-	private void setDataRemained(int dataRemained) {
-		this.dataRemained = dataRemained;
-	}
-
-	/**
-	 * Returns data remained
-	 * 
-	 * @return data_remainned
-	 */
-	private int getDataRemained() {
-		return dataRemained;
-	}
-
-	/**
-	 * Sets current place in the byte[]
-	 * 
-	 * @param currentPlace
-	 */
-	private void setCurrentPlace(int currentPlace) {
-		this.currentPlace = currentPlace;
-	}
-
-	/**
-	 * Returns current place in the byte[]
-	 * 
-	 * @return current place
-	 */
-	private int getCurrentPlace() {
-		return currentPlace;
-	}
-
-	// @Override
-	public void parse(byte[] data, ChmItsfHeader chmItsfHeader) {
-		if (data.length < ChmConstants.CHM_ITSF_V2_LEN
-				|| data.length > ChmConstants.CHM_ITSF_V3_LEN)
-			throw new ChmParsingException(
-					"we only know how to deal with the 0x58 and 0x60 byte structures");
-
-		chmItsfHeader.setDataRemained(data.length);
-		chmItsfHeader.unmarshalCharArray(data, chmItsfHeader,
-				ChmConstants.CHM_SIGNATURE_LEN);
-		chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data,
-				chmItsfHeader.getVersion()));
-		chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data,
-				chmItsfHeader.getHeaderLen()));
-		chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data,
-				chmItsfHeader.getUnknown_000c()));
-		chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data,
-				chmItsfHeader.getLastModified()));
-		chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data,
-				chmItsfHeader.getLangId()));
-		chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data,
-				chmItsfHeader.getDir_uuid(), 16));
-		chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data,
-				chmItsfHeader.getStream_uuid(), 16));
-		chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data,
-				chmItsfHeader.getUnknownOffset()));
-		chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data,
-				chmItsfHeader.getUnknownLen()));
-		chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data,
-				chmItsfHeader.getDirOffset()));
-		chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data,
-				chmItsfHeader.getDirLen()));
-
-		if (!new String(chmItsfHeader.getSignature()).equals(ChmConstants.ITSF))
-			throw new ChmParsingException("seems not valid file");
-		if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
-			if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
-				throw new ChmParsingException("something wrong with header");
-		} else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
-			if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
-				throw new ChmParsingException("unknown v3 header lenght");
-		} else
-			throw new ChmParsingException("unsupported chm format");
-
-		/*
-		 * now, if we have a V3 structure, unmarshal the rest, otherwise,
-		 * compute it
-		 */
-		if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
-			if (chmItsfHeader.getDataRemained() >= 0)
-				chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
-						+ chmItsfHeader.getDirLen());
-			else
-				throw new ChmParsingException(
-						"cannot set data offset, no data remained");
-		} else
-			chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
-					+ chmItsfHeader.getDirLen());
-	}
+    private static final long serialVersionUID = 2215291838533213826L;
+    private byte[] signature = new String("ITSF").getBytes(); /* 0 (ITSF) */
+    private int version; /* 4 */
+    private int header_len; /* 8 */
+    private int unknown_000c; /* c */
+    private long last_modified; /* 10 */
+    private long lang_id; /* 14 */
+    private byte[] dir_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 18 */
+    private byte[] stream_uuid = new byte[ChmConstants.BYTE_ARRAY_LENGHT]; /* 28 */
+    private long unknown_offset; /* 38 */
+    private long unknown_len; /* 40 */
+    private long dir_offset; /* 48 */
+    private long dir_len; /* 50 */
+    private long data_offset; /* 58 (Not present before V3) */
+
+    /* local usage */
+    private int dataRemained;
+    private int currentPlace = 0;
+
+    /**
+     * Prints the values of ChmfHeader
+     */
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append(new String(getSignature()) + " ");
+        sb.append(getVersion() + " ");
+        sb.append(getHeaderLen() + " ");
+        sb.append(getUnknown_000c() + " ");
+        sb.append(getLastModified() + " ");
+        sb.append(getLangId() + " ");
+        sb.append(getDir_uuid() + " ");
+        sb.append(getStream_uuid() + " ");
+        sb.append(getUnknownOffset() + " ");
+        sb.append(getUnknownLen() + " ");
+        sb.append(getDirOffset() + " ");
+        sb.append(getDirLen() + " ");
+        sb.append(getDataOffset() + " ");
+        return sb.toString();
+    }
+
+    /**
+     * Returns a signature of itsf header
+     * 
+     * @return itsf header
+     */
+    public byte[] getSignature() {
+        return signature;
+    }
+
+    /**
+     * Sets itsf header signature
+     * 
+     * @param signature
+     */
+    protected void setSignature(byte[] signature) {
+        this.signature = signature;
+    }
+
+    /**
+     * Returns itsf header version
+     * 
+     * @return itsf version
+     */
+    public int getVersion() {
+        return version;
+    }
+
+    /**
+     * Sets itsf version
+     * 
+     * @param version
+     */
+    protected void setVersion(int version) {
+        this.version = version;
+    }
+
+    /**
+     * Returns itsf header length
+     * 
+     * @return length
+     */
+    public int getHeaderLen() {
+        return header_len;
+    }
+
+    /**
+     * Sets itsf header length
+     * 
+     * @param header_len
+     */
+    protected void setHeaderLen(int header_len) {
+        this.header_len = header_len;
+    }
+
+    /**
+     * Returns unknown_00c value
+     * 
+     * @return unknown_00c
+     */
+    public int getUnknown_000c() {
+        return unknown_000c;
+    }
+
+    /**
+     * Sets unknown_00c
+     * 
+     * @param unknown_000c
+     */
+    protected void setUnknown_000c(int unknown_000c) {
+        this.unknown_000c = unknown_000c;
+    }
+
+    /**
+     * Returns last modified date of the chm file
+     * 
+     * @return last modified date as long
+     */
+    public long getLastModified() {
+        return last_modified;
+    }
+
+    /**
+     * Sets last modified date of the chm file
+     * 
+     * @param last_modified
+     */
+    protected void setLastModified(long last_modified) {
+        this.last_modified = last_modified;
+    }
+
+    /**
+     * Returns language ID
+     * 
+     * @return language_id
+     */
+    public long getLangId() {
+        return lang_id;
+    }
+
+    /**
+     * Sets language_id
+     * 
+     * @param lang_id
+     */
+    protected void setLangId(long lang_id) {
+        this.lang_id = lang_id;
+    }
+
+    /**
+     * Returns directory uuid
+     * 
+     * @return dir_uuid
+     */
+    public byte[] getDir_uuid() {
+        return dir_uuid;
+    }
+
+    /**
+     * Sets directory uuid
+     * 
+     * @param dir_uuid
+     */
+    protected void setDir_uuid(byte[] dir_uuid) {
+        this.dir_uuid = dir_uuid;
+    }
+
+    /**
+     * Returns stream uuid
+     * 
+     * @return stream_uuid
+     */
+    public byte[] getStream_uuid() {
+        return stream_uuid;
+    }
+
+    /**
+     * Sets stream uuid
+     * 
+     * @param stream_uuid
+     */
+    protected void setStream_uuid(byte[] stream_uuid) {
+        this.stream_uuid = stream_uuid;
+    }
+
+    /**
+     * Returns unknown offset
+     * 
+     * @return unknown_offset
+     */
+    public long getUnknownOffset() {
+        return unknown_offset;
+    }
+
+    /**
+     * Sets unknown offset
+     * 
+     * @param unknown_offset
+     */
+    protected void setUnknownOffset(long unknown_offset) {
+        this.unknown_offset = unknown_offset;
+    }
+
+    /**
+     * Returns unknown length
+     * 
+     * @return unknown_length
+     */
+    public long getUnknownLen() {
+        return unknown_len;
+    }
+
+    /**
+     * Sets unknown length
+     * 
+     * @param unknown_len
+     */
+    protected void setUnknownLen(long unknown_len) {
+        this.unknown_len = unknown_len;
+    }
+
+    /**
+     * Returns directory offset
+     * 
+     * @return directory_offset
+     */
+    public long getDirOffset() {
+        return dir_offset;
+    }
+
+    /**
+     * Sets directory offset
+     * 
+     * @param dir_offset
+     */
+    protected void setDirOffset(long dir_offset) {
+        this.dir_offset = dir_offset;
+    }
+
+    /**
+     * Returns directory length
+     * 
+     * @return directory_offset
+     */
+    public long getDirLen() {
+        return dir_len;
+    }
+
+    /**
+     * Sets directory length
+     * 
+     * @param dir_len
+     */
+    protected void setDirLen(long dir_len) {
+        this.dir_len = dir_len;
+    }
+
+    /**
+     * Returns data offset
+     * 
+     * @return data_offset
+     */
+    public long getDataOffset() {
+        return data_offset;
+    }
+
+    /**
+     * Sets data offset
+     * 
+     * @param data_offset
+     */
+    protected void setDataOffset(long data_offset) {
+        this.data_offset = data_offset;
+    }
+
+    /**
+     * Copies 4 first bytes of the byte[]
+     * 
+     * @param data
+     * @param chmItsfHeader
+     * @param count
+     */
+    private void unmarshalCharArray(byte[] data, ChmItsfHeader chmItsfHeader,
+            int count) {
+        ChmAssert.assertChmAccessorParameters(data, chmItsfHeader, count);
+        System.arraycopy(data, 0, chmItsfHeader.signature, 0, count);
+        this.setCurrentPlace(this.getCurrentPlace() + count);
+        this.setDataRemained(this.getDataRemained() - count);
+    }
+
+    /**
+     * Copies X bytes of source byte[] to the dest byte[]
+     * 
+     * @param data
+     * @param dest
+     * @param count
+     * @return
+     */
+    private byte[] unmarshalUuid(byte[] data, byte[] dest, int count) {
+        System.arraycopy(data, this.getCurrentPlace(), dest, 0, count);
+        this.setCurrentPlace(this.getCurrentPlace() + count);
+        this.setDataRemained(this.getDataRemained() - count);
+        return dest;
+    }
+
+    /**
+     * Takes 8 bytes and reverses them
+     * 
+     * @param data
+     * @param dest
+     * @return
+     */
+    private long unmarshalUint64(byte[] data, long dest) {
+        byte[] temp = new byte[8];
+        int i, j;
+
+        if (8 > this.getDataRemained())
+            throw new ChmParsingException("8 > this.getDataRemained()");
+
+        for (i = 8, j = 7; i > 0; i--) {
+            temp[j--] = data[this.getCurrentPlace()];
+            this.setCurrentPlace(this.getCurrentPlace() + 1);
+        }
+
+        dest = new BigInteger(temp).longValue();
+        this.setDataRemained(this.getDataRemained() - 8);
+        return dest;
+    }
+
+    private int unmarshalInt32(byte[] data, int dest) {
+        ChmAssert.assertByteArrayNotNull(data);
+
+        if (4 > this.getDataRemained())
+            throw new ChmParsingException("4 > dataLenght");
+        dest = data[this.getCurrentPlace()]
+                | data[this.getCurrentPlace() + 1] << 8
+                | data[this.getCurrentPlace() + 2] << 16
+                | data[this.getCurrentPlace() + 3] << 24;
+
+        this.setCurrentPlace(this.getCurrentPlace() + 4);
+        this.setDataRemained(this.getDataRemained() - 4);
+        return dest;
+    }
+
+    private long unmarshalUInt32(byte[] data, long dest) {
+        ChmAssert.assertByteArrayNotNull(data);
+        if (4 > getDataRemained())
+            throw new ChmParsingException("4 > dataLenght");
+        dest = data[this.getCurrentPlace()]
+                | data[this.getCurrentPlace() + 1] << 8
+                | data[this.getCurrentPlace() + 2] << 16
+                | data[this.getCurrentPlace() + 3] << 24;
+
+        setDataRemained(this.getDataRemained() - 4);
+        this.setCurrentPlace(this.getCurrentPlace() + 4);
+        return dest;
+    }
+
+    public static void main(String[] args) {
+    }
+
+    /**
+     * Sets data remained to be processed
+     * 
+     * @param dataRemained
+     */
+    private void setDataRemained(int dataRemained) {
+        this.dataRemained = dataRemained;
+    }
+
+    /**
+     * Returns data remained
+     * 
+     * @return data_remainned
+     */
+    private int getDataRemained() {
+        return dataRemained;
+    }
+
+    /**
+     * Sets current place in the byte[]
+     * 
+     * @param currentPlace
+     */
+    private void setCurrentPlace(int currentPlace) {
+        this.currentPlace = currentPlace;
+    }
+
+    /**
+     * Returns current place in the byte[]
+     * 
+     * @return current place
+     */
+    private int getCurrentPlace() {
+        return currentPlace;
+    }
+
+    // @Override
+    public void parse(byte[] data, ChmItsfHeader chmItsfHeader) {
+        if (data.length < ChmConstants.CHM_ITSF_V2_LEN
+                || data.length > ChmConstants.CHM_ITSF_V3_LEN)
+            throw new ChmParsingException(
+                    "we only know how to deal with the 0x58 and 0x60 byte structures");
+
+        chmItsfHeader.setDataRemained(data.length);
+        chmItsfHeader.unmarshalCharArray(data, chmItsfHeader,
+                ChmConstants.CHM_SIGNATURE_LEN);
+        chmItsfHeader.setVersion(chmItsfHeader.unmarshalInt32(data,
+                chmItsfHeader.getVersion()));
+        chmItsfHeader.setHeaderLen(chmItsfHeader.unmarshalInt32(data,
+                chmItsfHeader.getHeaderLen()));
+        chmItsfHeader.setUnknown_000c(chmItsfHeader.unmarshalInt32(data,
+                chmItsfHeader.getUnknown_000c()));
+        chmItsfHeader.setLastModified(chmItsfHeader.unmarshalUInt32(data,
+                chmItsfHeader.getLastModified()));
+        chmItsfHeader.setLangId(chmItsfHeader.unmarshalUInt32(data,
+                chmItsfHeader.getLangId()));
+        chmItsfHeader.setDir_uuid(chmItsfHeader.unmarshalUuid(data,
+                chmItsfHeader.getDir_uuid(), 16));
+        chmItsfHeader.setStream_uuid(chmItsfHeader.unmarshalUuid(data,
+                chmItsfHeader.getStream_uuid(), 16));
+        chmItsfHeader.setUnknownOffset(chmItsfHeader.unmarshalUint64(data,
+                chmItsfHeader.getUnknownOffset()));
+        chmItsfHeader.setUnknownLen(chmItsfHeader.unmarshalUint64(data,
+                chmItsfHeader.getUnknownLen()));
+        chmItsfHeader.setDirOffset(chmItsfHeader.unmarshalUint64(data,
+                chmItsfHeader.getDirOffset()));
+        chmItsfHeader.setDirLen(chmItsfHeader.unmarshalUint64(data,
+                chmItsfHeader.getDirLen()));
+
+        if (!new String(chmItsfHeader.getSignature()).equals(ChmConstants.ITSF))
+            throw new ChmParsingException("seems not valid file");
+        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_2) {
+            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V2_LEN)
+                throw new ChmParsingException("something wrong with header");
+        } else if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+            if (chmItsfHeader.getHeaderLen() < ChmConstants.CHM_ITSF_V3_LEN)
+                throw new ChmParsingException("unknown v3 header lenght");
+        } else
+            throw new ChmParsingException("unsupported chm format");
+
+        /*
+         * now, if we have a V3 structure, unmarshal the rest, otherwise,
+         * compute it
+         */
+        if (chmItsfHeader.getVersion() == ChmConstants.CHM_VER_3) {
+            if (chmItsfHeader.getDataRemained() >= 0)
+                chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+                        + chmItsfHeader.getDirLen());
+            else
+                throw new ChmParsingException(
+                        "cannot set data offset, no data remained");
+        } else
+            chmItsfHeader.setDataOffset(chmItsfHeader.getDirOffset()
+                    + chmItsfHeader.getDirLen());
+    }
 }