You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:29 UTC
[23/39] tika git commit: Convert new lines from windows to unix

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java
index 101b26b..51dc5a5 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmLzxState.java
@@ -1,327 +1,327 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.lzx;
-
-import java.util.concurrent.CancellationException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.core.ChmCommons;
-import org.apache.tika.parser.chm.core.ChmCommons.IntelState;
-import org.apache.tika.parser.chm.core.ChmCommons.LzxState;
-import org.apache.tika.parser.chm.core.ChmConstants;
-import org.apache.tika.parser.chm.exception.ChmParsingException;
-
-public class ChmLzxState implements Cloneable {
-    /* Class' members */
-    private int window; /* the actual decoding window */
-    private long window_size; /* window size (32Kb through 2Mb) */
-    private int window_position; /* current offset within the window */
-    private int main_tree_elements; /* number of main tree elements */
-    private LzxState hadStarted; /* have we started decoding at all yet? */
-    private int block_type; /* type of this block */
-    private int block_length; /* uncompressed length of this block */
-    private int block_remaining; /* uncompressed bytes still left to decode */
-    private int frames_read; /* the number of CFDATA blocks processed */
-    private int intel_file_size; /* magic header value used for transform */
-    private long intel_current_possition; /* current offset in transform space */
-    private IntelState intel_state; /* have we seen any translatable data yet? */
-    private long R0; /* for the LRU offset system */
-    private long R1; /* for the LRU offset system */
-    private long R2; /* for the LRU offset system */
-
-    // Trees - PRETREE, MAINTREE, LENGTH, ALIGNED
-    protected short[] mainTreeLengtsTable;
-    protected short[] mainTreeTable;
-
-    protected short[] lengthTreeTable;
-    protected short[] lengthTreeLengtsTable;
-
-    protected short[] alignedLenTable;
-    protected short[] alignedTreeTable;
-
-    @Override
-    public ChmLzxState clone() {
-        try {
-          ChmLzxState clone = (ChmLzxState)super.clone();
-          clone.mainTreeLengtsTable = arrayClone(mainTreeLengtsTable);
-          clone.mainTreeTable = arrayClone(mainTreeTable);
-          clone.lengthTreeTable = arrayClone(lengthTreeTable);
-          clone.lengthTreeLengtsTable = arrayClone(lengthTreeLengtsTable);
-          clone.alignedLenTable = arrayClone(alignedLenTable);
-          clone.alignedTreeTable = arrayClone(alignedTreeTable);
-          return clone;
-        } catch (CloneNotSupportedException ex) {
-           return null;
-        }
-    }
-    
-    protected short[] getMainTreeTable() {
-        return mainTreeTable;
-    }
-
-    protected short[] getAlignedTreeTable() {
-        return alignedTreeTable;
-    }
-
-    protected void setAlignedTreeTable(short[] alignedTreeTable) {
-        this.alignedTreeTable = alignedTreeTable;
-    }
-
-    protected short[] getLengthTreeTable() throws TikaException {
-        if (lengthTreeTable != null)
-            return this.lengthTreeTable;
-        else
-            throw new ChmParsingException("lengthTreeTable is null");
-    }
-
-    protected void setLengthTreeTable(short[] lengthTreeTable) {
-        this.lengthTreeTable = lengthTreeTable;
-    }
-
-    protected void setMainTreeTable(short[] mainTreeTable) {
-        this.mainTreeTable = mainTreeTable;
-    }
-
-    protected short[] getAlignedLenTable() {
-        return this.alignedLenTable;
-    }
-
-    protected void setAlignedLenTable(short[] alignedLenTable) {
-        this.alignedLenTable = alignedLenTable;
-    }
-
-    /**
-     * It suits for informative outlook
-     */
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append("actual decoding window:=" + getWindow()
-                + System.getProperty("line.separator"));
-        sb.append("window size (32Kb through 2Mb):=" + getWindowSize()
-                + System.getProperty("line.separator"));
-        sb.append("current offset within the window:=" + getWindowPosition()
-                + System.getProperty("line.separator"));
-        sb.append("number of main tree elements:=" + getMainTreeElements()
-                + System.getProperty("line.separator"));
-        sb.append("have we started decoding at all yet?:=" + getHadStarted()
-                + System.getProperty("line.separator"));
-        sb.append("type of this block:=" + getBlockType()
-                + System.getProperty("line.separator"));
-        sb.append("uncompressed length of this block:=" + getBlockLength()
-                + System.getProperty("line.separator"));
-        sb.append("uncompressed bytes still left to decode:="
-                + getBlockRemaining() + System.getProperty("line.separator"));
-        sb.append("the number of CFDATA blocks processed:=" + getFramesRead()
-                + System.getProperty("line.separator"));
-        sb.append("magic header value used for transform:="
-                + getIntelFileSize() + System.getProperty("line.separator"));
-        sb.append("current offset in transform space:="
-                + getIntelCurrentPossition()
-                + System.getProperty("line.separator"));
-        sb.append("have we seen any translatable data yet?:=" + getIntelState()
-                + System.getProperty("line.separator"));
-        sb.append("R0 for the LRU offset system:=" + getR0()
-                + System.getProperty("line.separator"));
-        sb.append("R1 for the LRU offset system:=" + getR1()
-                + System.getProperty("line.separator"));
-        sb.append("R2 for the LRU offset system:=" + getR2()
-                + System.getProperty("line.separator"));
-        sb.append("main tree length:=" + getMainTreeLengtsTable().length
-                + System.getProperty("line.separator"));
-        sb.append("secondary tree length:=" + getLengthTreeLengtsTable().length
-                + System.getProperty("line.separator"));
-        return sb.toString();
-    }
-
-    public ChmLzxState(int window) throws TikaException {
-        if (window >= 0) {
-            int position_slots;
-            int win = ChmCommons.getWindowSize(window);
-            setWindowSize(1 << win);
-            /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
-            if (win < 15 || win > 21)
-                throw new ChmParsingException("window less than 15 or window greater than 21");
-
-            /* Calculates required position slots */
-            if (win == 20)
-                position_slots = 42;
-            else if (win == 21)
-                position_slots = 50;
-            else
-                position_slots = win << 1;
-            //TODO: position_slots is not used ?
-            setR0(1);
-            setR1(1);
-            setR2(1);
-            setMainTreeElements(512);
-            setHadStarted(LzxState.NOT_STARTED_DECODING);
-            setFramesRead(0);
-            setBlockRemaining(0);
-            setBlockType(ChmConstants.LZX_BLOCKTYPE_INVALID);
-            setIntelCurrentPossition(0);
-            setIntelState(IntelState.NOT_STARTED);
-            setWindowPosition(0);
-            setMainTreeLengtsTable(new short[getMainTreeElements()]);
-            setLengthTreeLengtsTable(new short[ChmConstants.LZX_NUM_SECONDARY_LENGTHS]);
-        } else
-            throw new CancellationException(
-                    "window size should be more than zero");
-    }
-
-    protected void setWindow(int window) {
-        this.window = window;
-    }
-
-    protected int getWindow() {
-        return window;
-    }
-
-    protected void setWindowSize(long window_size) {
-        this.window_size = window_size;
-    }
-
-    protected long getWindowSize() {
-        return window_size;
-    }
-
-    protected void setWindowPosition(int window_position) {
-        this.window_position = window_position;
-    }
-
-    protected int getWindowPosition() {
-        return window_position;
-    }
-
-    protected void setMainTreeElements(int main_tree_elements) {
-        this.main_tree_elements = main_tree_elements;
-    }
-
-    protected int getMainTreeElements() {
-        return main_tree_elements;
-    }
-
-    protected void setHadStarted(LzxState hadStarted) {
-        this.hadStarted = hadStarted;
-    }
-
-    protected LzxState getHadStarted() {
-        return hadStarted;
-    }
-
-    protected void setBlockType(int block_type) {
-        this.block_type = block_type;
-    }
-
-    public int getBlockType() {
-        return block_type;
-    }
-
-    protected void setBlockLength(int block_length) {
-        this.block_length = block_length;
-    }
-
-    protected int getBlockLength() {
-        return block_length;
-    }
-
-    protected void setBlockRemaining(int block_remaining) {
-        this.block_remaining = block_remaining;
-    }
-
-    protected int getBlockRemaining() {
-        return block_remaining;
-    }
-
-    protected void setFramesRead(int frames_read) {
-        this.frames_read = frames_read;
-    }
-
-    protected void increaseFramesRead() {
-        this.frames_read = getFramesRead() + 1;
-    }
-
-    protected int getFramesRead() {
-        return frames_read;
-    }
-
-    protected void setIntelFileSize(int intel_file_size) {
-        this.intel_file_size = intel_file_size;
-    }
-
-    protected int getIntelFileSize() {
-        return intel_file_size;
-    }
-
-    protected void setIntelCurrentPossition(long intel_current_possition) {
-        this.intel_current_possition = intel_current_possition;
-    }
-
-    protected long getIntelCurrentPossition() {
-        return intel_current_possition;
-    }
-
-    protected void setIntelState(IntelState intel_state) {
-        this.intel_state = intel_state;
-    }
-
-    protected IntelState getIntelState() {
-        return intel_state;
-    }
-
-    protected void setR0(long r0) {
-        R0 = r0;
-    }
-
-    protected long getR0() {
-        return R0;
-    }
-
-    protected void setR1(long r1) {
-        R1 = r1;
-    }
-
-    protected long getR1() {
-        return R1;
-    }
-
-    protected void setR2(long r2) {
-        R2 = r2;
-    }
-
-    protected long getR2() {
-        return R2;
-    }
-
-    public void setMainTreeLengtsTable(short[] mainTreeLengtsTable) {
-        this.mainTreeLengtsTable = mainTreeLengtsTable;
-    }
-
-    public short[] getMainTreeLengtsTable() {
-        return mainTreeLengtsTable;
-    }
-
-    public void setLengthTreeLengtsTable(short[] lengthTreeLengtsTable) {
-        this.lengthTreeLengtsTable = lengthTreeLengtsTable;
-    }
-
-    public short[] getLengthTreeLengtsTable() {
-        return lengthTreeLengtsTable;
-    }
-    
-    private static short[] arrayClone(short[] a) {
-        return a==null ? null : (short[]) a.clone();
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.lzx;
+
+import java.util.concurrent.CancellationException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.core.ChmCommons;
+import org.apache.tika.parser.chm.core.ChmCommons.IntelState;
+import org.apache.tika.parser.chm.core.ChmCommons.LzxState;
+import org.apache.tika.parser.chm.core.ChmConstants;
+import org.apache.tika.parser.chm.exception.ChmParsingException;
+
+public class ChmLzxState implements Cloneable {
+    /* Class' members */
+    private int window; /* the actual decoding window */
+    private long window_size; /* window size (32Kb through 2Mb) */
+    private int window_position; /* current offset within the window */
+    private int main_tree_elements; /* number of main tree elements */
+    private LzxState hadStarted; /* have we started decoding at all yet? */
+    private int block_type; /* type of this block */
+    private int block_length; /* uncompressed length of this block */
+    private int block_remaining; /* uncompressed bytes still left to decode */
+    private int frames_read; /* the number of CFDATA blocks processed */
+    private int intel_file_size; /* magic header value used for transform */
+    private long intel_current_possition; /* current offset in transform space */
+    private IntelState intel_state; /* have we seen any translatable data yet? */
+    private long R0; /* for the LRU offset system */
+    private long R1; /* for the LRU offset system */
+    private long R2; /* for the LRU offset system */
+
+    // Trees - PRETREE, MAINTREE, LENGTH, ALIGNED
+    protected short[] mainTreeLengtsTable;
+    protected short[] mainTreeTable;
+
+    protected short[] lengthTreeTable;
+    protected short[] lengthTreeLengtsTable;
+
+    protected short[] alignedLenTable;
+    protected short[] alignedTreeTable;
+
+    @Override
+    public ChmLzxState clone() {
+        try {
+          ChmLzxState clone = (ChmLzxState)super.clone();
+          clone.mainTreeLengtsTable = arrayClone(mainTreeLengtsTable);
+          clone.mainTreeTable = arrayClone(mainTreeTable);
+          clone.lengthTreeTable = arrayClone(lengthTreeTable);
+          clone.lengthTreeLengtsTable = arrayClone(lengthTreeLengtsTable);
+          clone.alignedLenTable = arrayClone(alignedLenTable);
+          clone.alignedTreeTable = arrayClone(alignedTreeTable);
+          return clone;
+        } catch (CloneNotSupportedException ex) {
+           return null;
+        }
+    }
+    
+    protected short[] getMainTreeTable() {
+        return mainTreeTable;
+    }
+
+    protected short[] getAlignedTreeTable() {
+        return alignedTreeTable;
+    }
+
+    protected void setAlignedTreeTable(short[] alignedTreeTable) {
+        this.alignedTreeTable = alignedTreeTable;
+    }
+
+    protected short[] getLengthTreeTable() throws TikaException {
+        if (lengthTreeTable != null)
+            return this.lengthTreeTable;
+        else
+            throw new ChmParsingException("lengthTreeTable is null");
+    }
+
+    protected void setLengthTreeTable(short[] lengthTreeTable) {
+        this.lengthTreeTable = lengthTreeTable;
+    }
+
+    protected void setMainTreeTable(short[] mainTreeTable) {
+        this.mainTreeTable = mainTreeTable;
+    }
+
+    protected short[] getAlignedLenTable() {
+        return this.alignedLenTable;
+    }
+
+    protected void setAlignedLenTable(short[] alignedLenTable) {
+        this.alignedLenTable = alignedLenTable;
+    }
+
+    /**
+     * It suits for informative outlook
+     */
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("actual decoding window:=" + getWindow()
+                + System.getProperty("line.separator"));
+        sb.append("window size (32Kb through 2Mb):=" + getWindowSize()
+                + System.getProperty("line.separator"));
+        sb.append("current offset within the window:=" + getWindowPosition()
+                + System.getProperty("line.separator"));
+        sb.append("number of main tree elements:=" + getMainTreeElements()
+                + System.getProperty("line.separator"));
+        sb.append("have we started decoding at all yet?:=" + getHadStarted()
+                + System.getProperty("line.separator"));
+        sb.append("type of this block:=" + getBlockType()
+                + System.getProperty("line.separator"));
+        sb.append("uncompressed length of this block:=" + getBlockLength()
+                + System.getProperty("line.separator"));
+        sb.append("uncompressed bytes still left to decode:="
+                + getBlockRemaining() + System.getProperty("line.separator"));
+        sb.append("the number of CFDATA blocks processed:=" + getFramesRead()
+                + System.getProperty("line.separator"));
+        sb.append("magic header value used for transform:="
+                + getIntelFileSize() + System.getProperty("line.separator"));
+        sb.append("current offset in transform space:="
+                + getIntelCurrentPossition()
+                + System.getProperty("line.separator"));
+        sb.append("have we seen any translatable data yet?:=" + getIntelState()
+                + System.getProperty("line.separator"));
+        sb.append("R0 for the LRU offset system:=" + getR0()
+                + System.getProperty("line.separator"));
+        sb.append("R1 for the LRU offset system:=" + getR1()
+                + System.getProperty("line.separator"));
+        sb.append("R2 for the LRU offset system:=" + getR2()
+                + System.getProperty("line.separator"));
+        sb.append("main tree length:=" + getMainTreeLengtsTable().length
+                + System.getProperty("line.separator"));
+        sb.append("secondary tree length:=" + getLengthTreeLengtsTable().length
+                + System.getProperty("line.separator"));
+        return sb.toString();
+    }
+
+    public ChmLzxState(int window) throws TikaException {
+        if (window >= 0) {
+            int position_slots;
+            int win = ChmCommons.getWindowSize(window);
+            setWindowSize(1 << win);
+            /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
+            if (win < 15 || win > 21)
+                throw new ChmParsingException("window less than 15 or window greater than 21");
+
+            /* Calculates required position slots */
+            if (win == 20)
+                position_slots = 42;
+            else if (win == 21)
+                position_slots = 50;
+            else
+                position_slots = win << 1;
+            //TODO: position_slots is not used ?
+            setR0(1);
+            setR1(1);
+            setR2(1);
+            setMainTreeElements(512);
+            setHadStarted(LzxState.NOT_STARTED_DECODING);
+            setFramesRead(0);
+            setBlockRemaining(0);
+            setBlockType(ChmConstants.LZX_BLOCKTYPE_INVALID);
+            setIntelCurrentPossition(0);
+            setIntelState(IntelState.NOT_STARTED);
+            setWindowPosition(0);
+            setMainTreeLengtsTable(new short[getMainTreeElements()]);
+            setLengthTreeLengtsTable(new short[ChmConstants.LZX_NUM_SECONDARY_LENGTHS]);
+        } else
+            throw new CancellationException(
+                    "window size should be more than zero");
+    }
+
+    protected void setWindow(int window) {
+        this.window = window;
+    }
+
+    protected int getWindow() {
+        return window;
+    }
+
+    protected void setWindowSize(long window_size) {
+        this.window_size = window_size;
+    }
+
+    protected long getWindowSize() {
+        return window_size;
+    }
+
+    protected void setWindowPosition(int window_position) {
+        this.window_position = window_position;
+    }
+
+    protected int getWindowPosition() {
+        return window_position;
+    }
+
+    protected void setMainTreeElements(int main_tree_elements) {
+        this.main_tree_elements = main_tree_elements;
+    }
+
+    protected int getMainTreeElements() {
+        return main_tree_elements;
+    }
+
+    protected void setHadStarted(LzxState hadStarted) {
+        this.hadStarted = hadStarted;
+    }
+
+    protected LzxState getHadStarted() {
+        return hadStarted;
+    }
+
+    protected void setBlockType(int block_type) {
+        this.block_type = block_type;
+    }
+
+    public int getBlockType() {
+        return block_type;
+    }
+
+    protected void setBlockLength(int block_length) {
+        this.block_length = block_length;
+    }
+
+    protected int getBlockLength() {
+        return block_length;
+    }
+
+    protected void setBlockRemaining(int block_remaining) {
+        this.block_remaining = block_remaining;
+    }
+
+    protected int getBlockRemaining() {
+        return block_remaining;
+    }
+
+    protected void setFramesRead(int frames_read) {
+        this.frames_read = frames_read;
+    }
+
+    protected void increaseFramesRead() {
+        this.frames_read = getFramesRead() + 1;
+    }
+
+    protected int getFramesRead() {
+        return frames_read;
+    }
+
+    protected void setIntelFileSize(int intel_file_size) {
+        this.intel_file_size = intel_file_size;
+    }
+
+    protected int getIntelFileSize() {
+        return intel_file_size;
+    }
+
+    protected void setIntelCurrentPossition(long intel_current_possition) {
+        this.intel_current_possition = intel_current_possition;
+    }
+
+    protected long getIntelCurrentPossition() {
+        return intel_current_possition;
+    }
+
+    protected void setIntelState(IntelState intel_state) {
+        this.intel_state = intel_state;
+    }
+
+    protected IntelState getIntelState() {
+        return intel_state;
+    }
+
+    protected void setR0(long r0) {
+        R0 = r0;
+    }
+
+    protected long getR0() {
+        return R0;
+    }
+
+    protected void setR1(long r1) {
+        R1 = r1;
+    }
+
+    protected long getR1() {
+        return R1;
+    }
+
+    protected void setR2(long r2) {
+        R2 = r2;
+    }
+
+    protected long getR2() {
+        return R2;
+    }
+
+    public void setMainTreeLengtsTable(short[] mainTreeLengtsTable) {
+        this.mainTreeLengtsTable = mainTreeLengtsTable;
+    }
+
+    public short[] getMainTreeLengtsTable() {
+        return mainTreeLengtsTable;
+    }
+
+    public void setLengthTreeLengtsTable(short[] lengthTreeLengtsTable) {
+        this.lengthTreeLengtsTable = lengthTreeLengtsTable;
+    }
+
+    public short[] getLengthTreeLengtsTable() {
+        return lengthTreeLengtsTable;
+    }
+    
+    private static short[] arrayClone(short[] a) {
+        return a==null ? null : (short[]) a.clone();
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmSection.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmSection.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmSection.java
index c8944be..77f9b3a 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmSection.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/chm/lzx/ChmSection.java
@@ -1,222 +1,222 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.chm.lzx;
-
-import java.math.BigInteger;
-import java.util.Arrays;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.parser.chm.core.ChmCommons;
-
-public class ChmSection {
-    final private byte[] data;
-    final private byte[] prevcontent;
-    private int swath;// kiks
-    private int total;// remains
-    private int buffer;// val
-
-    public ChmSection(byte[] data) throws TikaException {
-        this(data, null);
-    }
-
-    public ChmSection(byte[] data, byte[] prevconent) throws TikaException {
-        ChmCommons.assertByteArrayNotNull(data);
-        this.data = data;
-        this.prevcontent = prevconent;
-        //setData(data);
-    }
-    
-    /* Utilities */
-    public byte[] reverseByteOrder(byte[] toBeReversed) throws TikaException {
-        ChmCommons.assertByteArrayNotNull(toBeReversed);
-        ChmCommons.reverse(toBeReversed);
-        return toBeReversed;
-    }
-
-    public int checkBit(int i) {
-        return ((getBuffer() & (1 << (getTotal() - i))) == 0) ? 0 : 1;
-    }
-
-    public int getSyncBits(int bit) {
-        return getDesyncBits(bit, bit);
-    }
-
-    public int peekBits(int bit) {
-        return getDesyncBits(bit, 0);
-    }
-    
-    private int getDesyncBits(int bit, int removeBit) {
-        while (getTotal() < 16) {
-            setBuffer((getBuffer() << 16) + unmarshalUByte()
-                    + (unmarshalUByte() << 8));
-            setTotal(getTotal() + 16);
-        }
-        int tmp = (getBuffer() >>> (getTotal() - bit));
-        setTotal(getTotal() - removeBit);
-        setBuffer(getBuffer() - ((getBuffer() >>> getTotal()) << getTotal()));
-        return tmp;
-    }
-
-    public int unmarshalUByte() {
-        return getByte() & 255;
-    }
-
-    public byte getByte() {
-        if (getSwath() < getData().length) {
-            setSwath(getSwath() + 1);
-            return getData()[getSwath() - 1];
-        } else
-            return 0;
-    }
-
-    public int getLeft() {
-        return (getData().length - getSwath());
-    }
-
-    public byte[] getData() {
-        return data;
-    }
-
-    public byte[] getPrevContent() {
-        return prevcontent;
-    }
-    
-    public BigInteger getBigInteger(int i) {
-        if (getData() == null)
-            return BigInteger.ZERO;
-        if (getData().length - getSwath() < i)
-            i = getData().length - getSwath();
-        byte[] tmp = new byte[i];
-        for (int j = i - 1; j >= 0; j--) {
-            tmp[i - j - 1] = getData()[getSwath() + j];
-        }
-        setSwath(getSwath() + i);
-        return new BigInteger(tmp);
-    }
-
-    public byte[] stringToAsciiBytes(String s) {
-        char[] c = s.toCharArray();
-        byte[] byteval = new byte[c.length];
-        for (int i = 0; i < c.length; i++)
-            byteval[i] = (byte) c[i];
-        return byteval;
-    }
-
-    public BigInteger unmarshalUlong() {
-        return getBigInteger(8);
-    }
-
-    public long unmarshalUInt() {
-        return getBigInteger(4).longValue();
-    }
-
-    public int unmarshalInt() {
-        return getBigInteger(4).intValue();
-    }
-
-    public byte[] unmarshalBytes(int i) {
-        if (i == 0)
-            return new byte[1];
-        byte[] t = new byte[i];
-        for (int j = 0; j < i; j++)
-            t[j] = getData()[j + getSwath()];
-        setSwath(getSwath() + i);
-        return t;
-    }
-
-    public BigInteger getEncint() {
-        byte ob;
-        BigInteger bi = BigInteger.ZERO;
-        byte[] nb = new byte[1];
-        while ((ob = this.getByte()) < 0) {
-            nb[0] = (byte) ((ob & 0x7f));
-            bi = bi.shiftLeft(7).add(new BigInteger(nb));
-        }
-        nb[0] = (byte) ((ob & 0x7f));
-        bi = bi.shiftLeft(7).add(new BigInteger(nb));
-        return bi;
-    }
-
-    public char unmarshalUtfChar() {
-        byte ob;
-        int i = 1;
-        byte[] ba;
-        ob = this.getByte();
-        if (ob < 0) {
-            i = 2;
-            while ((ob << (24 + i)) < 0)
-                i++;
-        }
-        ba = new byte[i];
-        ba[0] = ob;
-        int j = 1;
-        while (j < i) {
-            ba[j] = this.getByte();
-            j++;
-        }
-        i = ba.length;
-        if (i == 1)
-            return (char) ba[0];
-        else {
-            int n;
-            n = ba[0] & 15; // 00001111b, gets last 4 bits
-            j = 1;
-            while (j < i)
-                n = (n << 6) + (ba[j++] & 63);// 00111111b,gets last 6 bits
-            return (char) n;
-        }
-    }
-
-//    private void setData(byte[] data) {
-//        this.data = data;
-//    }
-
-    public int getSwath() {
-        return swath;
-    }
-
-    public void setSwath(int swath) {
-        this.swath = swath;
-    }
-
-    public int getTotal() {
-        return total;
-    }
-
-    public void setTotal(int total) {
-        this.total = total;
-    }
-
-    private int getBuffer() {
-        return buffer;
-    }
-
-    private void setBuffer(int buffer) {
-        this.buffer = buffer;
-    }
-
-    /**
-     * @param args
-     * @throws TikaException 
-     */
-    public static void main(String[] args) throws TikaException {
-        byte[] array = { 4, 78, -67, 90, 1, -33 };
-        ChmSection chmSection = new ChmSection(array);
-        System.out.println("before " + Arrays.toString(array));
-        System.out.println("after " + Arrays.toString(chmSection.reverseByteOrder(array)));
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.chm.lzx;
+
+import java.math.BigInteger;
+import java.util.Arrays;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.chm.core.ChmCommons;
+
+public class ChmSection {
+    final private byte[] data;
+    final private byte[] prevcontent;
+    private int swath;// kiks
+    private int total;// remains
+    private int buffer;// val
+
+    public ChmSection(byte[] data) throws TikaException {
+        this(data, null);
+    }
+
+    public ChmSection(byte[] data, byte[] prevconent) throws TikaException {
+        ChmCommons.assertByteArrayNotNull(data);
+        this.data = data;
+        this.prevcontent = prevconent;
+        //setData(data);
+    }
+    
+    /* Utilities */
+    public byte[] reverseByteOrder(byte[] toBeReversed) throws TikaException {
+        ChmCommons.assertByteArrayNotNull(toBeReversed);
+        ChmCommons.reverse(toBeReversed);
+        return toBeReversed;
+    }
+
+    public int checkBit(int i) {
+        return ((getBuffer() & (1 << (getTotal() - i))) == 0) ? 0 : 1;
+    }
+
+    public int getSyncBits(int bit) {
+        return getDesyncBits(bit, bit);
+    }
+
+    public int peekBits(int bit) {
+        return getDesyncBits(bit, 0);
+    }
+    
+    private int getDesyncBits(int bit, int removeBit) {
+        while (getTotal() < 16) {
+            setBuffer((getBuffer() << 16) + unmarshalUByte()
+                    + (unmarshalUByte() << 8));
+            setTotal(getTotal() + 16);
+        }
+        int tmp = (getBuffer() >>> (getTotal() - bit));
+        setTotal(getTotal() - removeBit);
+        setBuffer(getBuffer() - ((getBuffer() >>> getTotal()) << getTotal()));
+        return tmp;
+    }
+
+    public int unmarshalUByte() {
+        return getByte() & 255;
+    }
+
+    public byte getByte() {
+        if (getSwath() < getData().length) {
+            setSwath(getSwath() + 1);
+            return getData()[getSwath() - 1];
+        } else
+            return 0;
+    }
+
+    public int getLeft() {
+        return (getData().length - getSwath());
+    }
+
+    public byte[] getData() {
+        return data;
+    }
+
+    public byte[] getPrevContent() {
+        return prevcontent;
+    }
+    
+    public BigInteger getBigInteger(int i) {
+        if (getData() == null)
+            return BigInteger.ZERO;
+        if (getData().length - getSwath() < i)
+            i = getData().length - getSwath();
+        byte[] tmp = new byte[i];
+        for (int j = i - 1; j >= 0; j--) {
+            tmp[i - j - 1] = getData()[getSwath() + j];
+        }
+        setSwath(getSwath() + i);
+        return new BigInteger(tmp);
+    }
+
+    public byte[] stringToAsciiBytes(String s) {
+        char[] c = s.toCharArray();
+        byte[] byteval = new byte[c.length];
+        for (int i = 0; i < c.length; i++)
+            byteval[i] = (byte) c[i];
+        return byteval;
+    }
+
+    public BigInteger unmarshalUlong() {
+        return getBigInteger(8);
+    }
+
+    public long unmarshalUInt() {
+        return getBigInteger(4).longValue();
+    }
+
+    public int unmarshalInt() {
+        return getBigInteger(4).intValue();
+    }
+
+    public byte[] unmarshalBytes(int i) {
+        if (i == 0)
+            return new byte[1];
+        byte[] t = new byte[i];
+        for (int j = 0; j < i; j++)
+            t[j] = getData()[j + getSwath()];
+        setSwath(getSwath() + i);
+        return t;
+    }
+
+    public BigInteger getEncint() {
+        byte ob;
+        BigInteger bi = BigInteger.ZERO;
+        byte[] nb = new byte[1];
+        while ((ob = this.getByte()) < 0) {
+            nb[0] = (byte) ((ob & 0x7f));
+            bi = bi.shiftLeft(7).add(new BigInteger(nb));
+        }
+        nb[0] = (byte) ((ob & 0x7f));
+        bi = bi.shiftLeft(7).add(new BigInteger(nb));
+        return bi;
+    }
+
+    public char unmarshalUtfChar() {
+        byte ob;
+        int i = 1;
+        byte[] ba;
+        ob = this.getByte();
+        if (ob < 0) {
+            i = 2;
+            while ((ob << (24 + i)) < 0)
+                i++;
+        }
+        ba = new byte[i];
+        ba[0] = ob;
+        int j = 1;
+        while (j < i) {
+            ba[j] = this.getByte();
+            j++;
+        }
+        i = ba.length;
+        if (i == 1)
+            return (char) ba[0];
+        else {
+            int n;
+            n = ba[0] & 15; // 00001111b, gets last 4 bits
+            j = 1;
+            while (j < i)
+                n = (n << 6) + (ba[j++] & 63);// 00111111b,gets last 6 bits
+            return (char) n;
+        }
+    }
+
+//    private void setData(byte[] data) {
+//        this.data = data;
+//    }
+
+    public int getSwath() {
+        return swath;
+    }
+
+    public void setSwath(int swath) {
+        this.swath = swath;
+    }
+
+    public int getTotal() {
+        return total;
+    }
+
+    public void setTotal(int total) {
+        this.total = total;
+    }
+
+    private int getBuffer() {
+        return buffer;
+    }
+
+    private void setBuffer(int buffer) {
+        this.buffer = buffer;
+    }
+
+    /**
+     * @param args
+     * @throws TikaException 
+     */
+    public static void main(String[] args) throws TikaException {
+        byte[] array = { 4, 78, -67, 90, 1, -33 };
+        ChmSection chmSection = new ChmSection(array);
+        System.out.println("before " + Arrays.toString(array));
+        System.out.println("after " + Arrays.toString(chmSection.reverseByteOrder(array)));
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
index 0e0e3da..86b1dd4 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
@@ -1,209 +1,209 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mbox;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Mbox (mailbox) parser. This version extracts each mail from Mbox and uses the
- * DelegatingParser to process each mail.
- */
-public class MboxParser extends AbstractParser {
-
-    public static final String MBOX_MIME_TYPE = "application/mbox";
-    public static final String MBOX_RECORD_DIVIDER = "From ";
-    public static final int MAIL_MAX_SIZE = 50000000;
-    /**
-     * Serial version UID
-     */
-    private static final long serialVersionUID = -1762689436731160661L;
-    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("mbox"));
-    private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
-    private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>");
-
-    private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-";
-    private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from";
-    private final Map<Integer, Metadata> trackingMetadata = new HashMap<Integer, Metadata>();
-    private boolean tracking = false;
-
-    public static Date parseDate(String headerContent) throws ParseException {
-        SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
-        return dateFormat.parse(headerContent);
-    }
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
-            throws IOException, TikaException, SAXException {
-
-        EmbeddedDocumentExtractor extractor = context.get(EmbeddedDocumentExtractor.class,
-                new ParsingEmbeddedDocumentExtractor(context));
-
-        String charsetName = "windows-1252";
-
-        metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
-        metadata.set(Metadata.CONTENT_ENCODING, charsetName);
-
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-
-        InputStreamReader isr = new InputStreamReader(stream, charsetName);
-        try (BufferedReader reader = new BufferedReader(isr)) {
-            String curLine = reader.readLine();
-            int mailItem = 0;
-            do {
-                if (curLine.startsWith(MBOX_RECORD_DIVIDER)) {
-                    Metadata mailMetadata = new Metadata();
-                    Queue<String> multiline = new LinkedList<String>();
-                    mailMetadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
-                    mailMetadata.set(Metadata.CONTENT_TYPE, "message/rfc822");
-                    curLine = reader.readLine();
-
-                    ByteArrayOutputStream message = new ByteArrayOutputStream(100000);
-                    do {
-                        if (curLine.startsWith(" ") || curLine.startsWith("\t")) {
-                            String latestLine = multiline.poll();
-                            latestLine += " " + curLine.trim();
-                            multiline.add(latestLine);
-                        } else {
-                            multiline.add(curLine);
-                        }
-
-                        message.write(curLine.getBytes(charsetName));
-                        message.write(0x0A);
-                        curLine = reader.readLine();
-                    }
-                    while (curLine != null && !curLine.startsWith(MBOX_RECORD_DIVIDER) && message.size() < MAIL_MAX_SIZE);
-
-                    for (String item : multiline) {
-                        saveHeaderInMetadata(mailMetadata, item);
-                    }
-
-                    ByteArrayInputStream messageStream = new ByteArrayInputStream(message.toByteArray());
-                    message = null;
-
-                    if (extractor.shouldParseEmbedded(mailMetadata)) {
-                        extractor.parseEmbedded(messageStream, xhtml, mailMetadata, true);
-                    }
-
-                    if (tracking) {
-                        getTrackingMetadata().put(mailItem++, mailMetadata);
-                    }
-                } else {
-                    curLine = reader.readLine();
-                }
-
-            } while (curLine != null && !Thread.currentThread().isInterrupted());
-        }
-
-        xhtml.endDocument();
-    }
-
-    public boolean isTracking() {
-        return tracking;
-    }
-
-    public void setTracking(boolean tracking) {
-        this.tracking = tracking;
-    }
-
-    public Map<Integer, Metadata> getTrackingMetadata() {
-        return trackingMetadata;
-    }
-
-    private void saveHeaderInMetadata(Metadata metadata, String curLine) {
-        Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
-        if (!headerMatcher.matches()) {
-            return; // ignore malformed header lines
-        }
-
-        String headerTag = headerMatcher.group(1).toLowerCase(Locale.ROOT);
-        String headerContent = headerMatcher.group(2);
-
-        if (headerTag.equalsIgnoreCase("From")) {
-            metadata.set(TikaCoreProperties.CREATOR, headerContent);
-        } else if (headerTag.equalsIgnoreCase("To") || headerTag.equalsIgnoreCase("Cc")
-                || headerTag.equalsIgnoreCase("Bcc")) {
-            Matcher address = EMAIL_ADDRESS_PATTERN.matcher(headerContent);
-            if (address.find()) {
-                metadata.add(Metadata.MESSAGE_RECIPIENT_ADDRESS, address.group(1));
-            } else if (headerContent.indexOf('@') > -1) {
-                metadata.add(Metadata.MESSAGE_RECIPIENT_ADDRESS, headerContent);
-            }
-
-            String property = Metadata.MESSAGE_TO;
-            if (headerTag.equalsIgnoreCase("Cc")) {
-                property = Metadata.MESSAGE_CC;
-            } else if (headerTag.equalsIgnoreCase("Bcc")) {
-                property = Metadata.MESSAGE_BCC;
-            }
-            metadata.add(property, headerContent);
-        } else if (headerTag.equalsIgnoreCase("Subject")) {
-            metadata.add(Metadata.SUBJECT, headerContent);
-        } else if (headerTag.equalsIgnoreCase("Date")) {
-            try {
-                Date date = parseDate(headerContent);
-                metadata.set(TikaCoreProperties.CREATED, date);
-            } catch (ParseException e) {
-                // ignoring date because format was not understood
-            }
-        } else if (headerTag.equalsIgnoreCase("Message-Id")) {
-            metadata.set(TikaCoreProperties.IDENTIFIER, headerContent);
-        } else if (headerTag.equalsIgnoreCase("In-Reply-To")) {
-            metadata.set(TikaCoreProperties.RELATION, headerContent);
-        } else if (headerTag.equalsIgnoreCase("Content-Type")) {
-            // TODO - key off content-type in headers to
-            // set mapping to use for content and convert if necessary.
-
-            metadata.add(Metadata.CONTENT_TYPE, headerContent);
-            metadata.set(TikaCoreProperties.FORMAT, headerContent);
-        } else {
-            metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent);
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mbox;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Mbox (mailbox) parser. This version extracts each mail from Mbox and uses the
+ * DelegatingParser to process each mail.
+ */
+public class MboxParser extends AbstractParser {
+
+    public static final String MBOX_MIME_TYPE = "application/mbox";
+    public static final String MBOX_RECORD_DIVIDER = "From ";
+    public static final int MAIL_MAX_SIZE = 50000000;
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = -1762689436731160661L;
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("mbox"));
+    private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
+    private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>");
+
+    private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-";
+    private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from";
+    private final Map<Integer, Metadata> trackingMetadata = new HashMap<Integer, Metadata>();
+    private boolean tracking = false;
+
+    public static Date parseDate(String headerContent) throws ParseException {
+        SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
+        return dateFormat.parse(headerContent);
+    }
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+            throws IOException, TikaException, SAXException {
+
+        EmbeddedDocumentExtractor extractor = context.get(EmbeddedDocumentExtractor.class,
+                new ParsingEmbeddedDocumentExtractor(context));
+
+        String charsetName = "windows-1252";
+
+        metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
+        metadata.set(Metadata.CONTENT_ENCODING, charsetName);
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        InputStreamReader isr = new InputStreamReader(stream, charsetName);
+        try (BufferedReader reader = new BufferedReader(isr)) {
+            String curLine = reader.readLine();
+            int mailItem = 0;
+            do {
+                if (curLine.startsWith(MBOX_RECORD_DIVIDER)) {
+                    Metadata mailMetadata = new Metadata();
+                    Queue<String> multiline = new LinkedList<String>();
+                    mailMetadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
+                    mailMetadata.set(Metadata.CONTENT_TYPE, "message/rfc822");
+                    curLine = reader.readLine();
+
+                    ByteArrayOutputStream message = new ByteArrayOutputStream(100000);
+                    do {
+                        if (curLine.startsWith(" ") || curLine.startsWith("\t")) {
+                            String latestLine = multiline.poll();
+                            latestLine += " " + curLine.trim();
+                            multiline.add(latestLine);
+                        } else {
+                            multiline.add(curLine);
+                        }
+
+                        message.write(curLine.getBytes(charsetName));
+                        message.write(0x0A);
+                        curLine = reader.readLine();
+                    }
+                    while (curLine != null && !curLine.startsWith(MBOX_RECORD_DIVIDER) && message.size() < MAIL_MAX_SIZE);
+
+                    for (String item : multiline) {
+                        saveHeaderInMetadata(mailMetadata, item);
+                    }
+
+                    ByteArrayInputStream messageStream = new ByteArrayInputStream(message.toByteArray());
+                    message = null;
+
+                    if (extractor.shouldParseEmbedded(mailMetadata)) {
+                        extractor.parseEmbedded(messageStream, xhtml, mailMetadata, true);
+                    }
+
+                    if (tracking) {
+                        getTrackingMetadata().put(mailItem++, mailMetadata);
+                    }
+                } else {
+                    curLine = reader.readLine();
+                }
+
+            } while (curLine != null && !Thread.currentThread().isInterrupted());
+        }
+
+        xhtml.endDocument();
+    }
+
+    public boolean isTracking() {
+        return tracking;
+    }
+
+    public void setTracking(boolean tracking) {
+        this.tracking = tracking;
+    }
+
+    public Map<Integer, Metadata> getTrackingMetadata() {
+        return trackingMetadata;
+    }
+
+    private void saveHeaderInMetadata(Metadata metadata, String curLine) {
+        Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
+        if (!headerMatcher.matches()) {
+            return; // ignore malformed header lines
+        }
+
+        String headerTag = headerMatcher.group(1).toLowerCase(Locale.ROOT);
+        String headerContent = headerMatcher.group(2);
+
+        if (headerTag.equalsIgnoreCase("From")) {
+            metadata.set(TikaCoreProperties.CREATOR, headerContent);
+        } else if (headerTag.equalsIgnoreCase("To") || headerTag.equalsIgnoreCase("Cc")
+                || headerTag.equalsIgnoreCase("Bcc")) {
+            Matcher address = EMAIL_ADDRESS_PATTERN.matcher(headerContent);
+            if (address.find()) {
+                metadata.add(Metadata.MESSAGE_RECIPIENT_ADDRESS, address.group(1));
+            } else if (headerContent.indexOf('@') > -1) {
+                metadata.add(Metadata.MESSAGE_RECIPIENT_ADDRESS, headerContent);
+            }
+
+            String property = Metadata.MESSAGE_TO;
+            if (headerTag.equalsIgnoreCase("Cc")) {
+                property = Metadata.MESSAGE_CC;
+            } else if (headerTag.equalsIgnoreCase("Bcc")) {
+                property = Metadata.MESSAGE_BCC;
+            }
+            metadata.add(property, headerContent);
+        } else if (headerTag.equalsIgnoreCase("Subject")) {
+            metadata.add(Metadata.SUBJECT, headerContent);
+        } else if (headerTag.equalsIgnoreCase("Date")) {
+            try {
+                Date date = parseDate(headerContent);
+                metadata.set(TikaCoreProperties.CREATED, date);
+            } catch (ParseException e) {
+                // ignoring date because format was not understood
+            }
+        } else if (headerTag.equalsIgnoreCase("Message-Id")) {
+            metadata.set(TikaCoreProperties.IDENTIFIER, headerContent);
+        } else if (headerTag.equalsIgnoreCase("In-Reply-To")) {
+            metadata.set(TikaCoreProperties.RELATION, headerContent);
+        } else if (headerTag.equalsIgnoreCase("Content-Type")) {
+            // TODO - key off content-type in headers to
+            // set mapping to use for content and convert if necessary.
+
+            metadata.add(Metadata.CONTENT_TYPE, headerContent);
+            metadata.set(TikaCoreProperties.FORMAT, headerContent);
+        } else {
+            metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
index f7eec91..5883bd5 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
@@ -1,203 +1,203 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mbox;
-
-import static java.lang.String.valueOf;
-import static java.nio.charset.StandardCharsets.UTF_8;
-import static java.util.Collections.singleton;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Set;
-
-import com.pff.PSTAttachment;
-import com.pff.PSTFile;
-import com.pff.PSTFolder;
-import com.pff.PSTMessage;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.TemporaryResources;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.AttributesImpl;
-
-/**
- * Parser for MS Outlook PST email storage files
- */
-public class OutlookPSTParser extends AbstractParser {
-
-    private static final long serialVersionUID = 620998217748364063L;
-
-    public static final MediaType MS_OUTLOOK_PST_MIMETYPE = MediaType.application("vnd.ms-outlook-pst");
-    private static final Set<MediaType> SUPPORTED_TYPES = singleton(MS_OUTLOOK_PST_MIMETYPE);
-
-    private static AttributesImpl createAttribute(String attName, String attValue) {
-        AttributesImpl attributes = new AttributesImpl();
-        attributes.addAttribute("", attName, attName, "CDATA", attValue);
-        return attributes;
-    }
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-
-        // Use the delegate parser to parse the contained document
-        EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class,
-                new ParsingEmbeddedDocumentExtractor(context));
-
-        metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString());
-
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-
-        TikaInputStream in = TikaInputStream.get(stream);
-        PSTFile pstFile = null;
-        try {
-            pstFile = new PSTFile(in.getFile().getPath());
-            metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length()));
-            boolean isValid = pstFile.getFileHandle().getFD().valid();
-            metadata.set("isValid", valueOf(isValid));
-            if (isValid) {
-                parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor);
-            }
-        } catch (Exception e) {
-            throw new TikaException(e.getMessage(), e);
-        } finally {
-            if (pstFile != null && pstFile.getFileHandle() != null) {
-                try {
-                    pstFile.getFileHandle().close();
-                } catch (IOException e) {
-                    //swallow closing exception
-                }
-            }
-        }
-
-        xhtml.endDocument();
-    }
-
-    private void parseFolder(XHTMLContentHandler handler, PSTFolder pstFolder, EmbeddedDocumentExtractor embeddedExtractor)
-            throws Exception {
-        if (pstFolder.getContentCount() > 0) {
-            PSTMessage pstMail = (PSTMessage) pstFolder.getNextChild();
-            while (pstMail != null) {
-                AttributesImpl attributes = new AttributesImpl();
-                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
-                attributes.addAttribute("", "id", "id", "CDATA", pstMail.getInternetMessageId());
-                handler.startElement("div", attributes);
-                handler.element("h1", pstMail.getSubject());
-
-                parserMailItem(handler, pstMail, embeddedExtractor);
-                parseMailAttachments(handler, pstMail, embeddedExtractor);
-
-                handler.endElement("div");
-
-                pstMail = (PSTMessage) pstFolder.getNextChild();
-            }
-        }
-
-        if (pstFolder.hasSubfolders()) {
-            for (PSTFolder pstSubFolder : pstFolder.getSubFolders()) {
-                handler.startElement("div", createAttribute("class", "email-folder"));
-                handler.element("h1", pstSubFolder.getDisplayName());
-                parseFolder(handler, pstSubFolder, embeddedExtractor);
-                handler.endElement("div");
-            }
-        }
-    }
-
-    private void parserMailItem(XHTMLContentHandler handler, PSTMessage pstMail, EmbeddedDocumentExtractor embeddedExtractor) throws SAXException, IOException {
-        Metadata mailMetadata = new Metadata();
-        mailMetadata.set(Metadata.RESOURCE_NAME_KEY, pstMail.getInternetMessageId());
-        mailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, pstMail.getInternetMessageId());
-        mailMetadata.set(TikaCoreProperties.IDENTIFIER, pstMail.getInternetMessageId());
-        mailMetadata.set(TikaCoreProperties.TITLE, pstMail.getSubject());
-        mailMetadata.set(Metadata.MESSAGE_FROM, pstMail.getSenderName());
-        mailMetadata.set(TikaCoreProperties.CREATOR, pstMail.getSenderName());
-        mailMetadata.set(TikaCoreProperties.CREATED, pstMail.getCreationTime());
-        mailMetadata.set(TikaCoreProperties.MODIFIED, pstMail.getLastModificationTime());
-        mailMetadata.set(TikaCoreProperties.COMMENTS, pstMail.getComment());
-        mailMetadata.set("descriptorNodeId", valueOf(pstMail.getDescriptorNodeId()));
-        mailMetadata.set("senderEmailAddress", pstMail.getSenderEmailAddress());
-        mailMetadata.set("recipients", pstMail.getRecipientsString());
-        mailMetadata.set("displayTo", pstMail.getDisplayTo());
-        mailMetadata.set("displayCC", pstMail.getDisplayCC());
-        mailMetadata.set("displayBCC", pstMail.getDisplayBCC());
-        mailMetadata.set("importance", valueOf(pstMail.getImportance()));
-        mailMetadata.set("priority", valueOf(pstMail.getPriority()));
-        mailMetadata.set("flagged", valueOf(pstMail.isFlagged()));
-
-        byte[] mailContent = pstMail.getBody().getBytes(UTF_8);
-        embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent), handler, mailMetadata, true);
-    }
-
-    private void parseMailAttachments(XHTMLContentHandler xhtml, PSTMessage email, EmbeddedDocumentExtractor embeddedExtractor)
-            throws TikaException {
-        int numberOfAttachments = email.getNumberOfAttachments();
-        for (int i = 0; i < numberOfAttachments; i++) {
-            File tempFile = null;
-            try {
-                PSTAttachment attach = email.getAttachment(i);
-
-                // Get the filename; both long and short filenames can be used for attachments
-                String filename = attach.getLongFilename();
-                if (filename.isEmpty()) {
-                    filename = attach.getFilename();
-                }
-
-                xhtml.element("p", filename);
-
-                Metadata attachMeta = new Metadata();
-                attachMeta.set(Metadata.RESOURCE_NAME_KEY, filename);
-                attachMeta.set(Metadata.EMBEDDED_RELATIONSHIP_ID, filename);
-                AttributesImpl attributes = new AttributesImpl();
-                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
-                attributes.addAttribute("", "id", "id", "CDATA", filename);
-                xhtml.startElement("div", attributes);
-                if (embeddedExtractor.shouldParseEmbedded(attachMeta)) {
-                    TemporaryResources tmp = new TemporaryResources();
-                    try {
-                        TikaInputStream tis = TikaInputStream.get(attach.getFileInputStream(), tmp);
-                        embeddedExtractor.parseEmbedded(tis, xhtml, attachMeta, true);
-                    } finally {
-                        tmp.dispose();
-                    }
-                }
-                xhtml.endElement("div");
-
-            } catch (Exception e) {
-                throw new TikaException("Unable to unpack document stream", e);
-            } finally {
-                if (tempFile != null)
-                    tempFile.delete();
-            }
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mbox;
+
+import static java.lang.String.valueOf;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.util.Collections.singleton;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Set;
+
+import com.pff.PSTAttachment;
+import com.pff.PSTFile;
+import com.pff.PSTFolder;
+import com.pff.PSTMessage;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * Parser for MS Outlook PST email storage files
+ */
+public class OutlookPSTParser extends AbstractParser {
+
+    private static final long serialVersionUID = 620998217748364063L;
+
+    public static final MediaType MS_OUTLOOK_PST_MIMETYPE = MediaType.application("vnd.ms-outlook-pst");
+    private static final Set<MediaType> SUPPORTED_TYPES = singleton(MS_OUTLOOK_PST_MIMETYPE);
+
+    private static AttributesImpl createAttribute(String attName, String attValue) {
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", attName, attName, "CDATA", attValue);
+        return attributes;
+    }
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+
+        // Use the delegate parser to parse the contained document
+        EmbeddedDocumentExtractor embeddedExtractor = context.get(EmbeddedDocumentExtractor.class,
+                new ParsingEmbeddedDocumentExtractor(context));
+
+        metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString());
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        TikaInputStream in = TikaInputStream.get(stream);
+        PSTFile pstFile = null;
+        try {
+            pstFile = new PSTFile(in.getFile().getPath());
+            metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length()));
+            boolean isValid = pstFile.getFileHandle().getFD().valid();
+            metadata.set("isValid", valueOf(isValid));
+            if (isValid) {
+                parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor);
+            }
+        } catch (Exception e) {
+            throw new TikaException(e.getMessage(), e);
+        } finally {
+            if (pstFile != null && pstFile.getFileHandle() != null) {
+                try {
+                    pstFile.getFileHandle().close();
+                } catch (IOException e) {
+                    //swallow closing exception
+                }
+            }
+        }
+
+        xhtml.endDocument();
+    }
+
+    private void parseFolder(XHTMLContentHandler handler, PSTFolder pstFolder, EmbeddedDocumentExtractor embeddedExtractor)
+            throws Exception {
+        if (pstFolder.getContentCount() > 0) {
+            PSTMessage pstMail = (PSTMessage) pstFolder.getNextChild();
+            while (pstMail != null) {
+                AttributesImpl attributes = new AttributesImpl();
+                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
+                attributes.addAttribute("", "id", "id", "CDATA", pstMail.getInternetMessageId());
+                handler.startElement("div", attributes);
+                handler.element("h1", pstMail.getSubject());
+
+                parserMailItem(handler, pstMail, embeddedExtractor);
+                parseMailAttachments(handler, pstMail, embeddedExtractor);
+
+                handler.endElement("div");
+
+                pstMail = (PSTMessage) pstFolder.getNextChild();
+            }
+        }
+
+        if (pstFolder.hasSubfolders()) {
+            for (PSTFolder pstSubFolder : pstFolder.getSubFolders()) {
+                handler.startElement("div", createAttribute("class", "email-folder"));
+                handler.element("h1", pstSubFolder.getDisplayName());
+                parseFolder(handler, pstSubFolder, embeddedExtractor);
+                handler.endElement("div");
+            }
+        }
+    }
+
+    private void parserMailItem(XHTMLContentHandler handler, PSTMessage pstMail, EmbeddedDocumentExtractor embeddedExtractor) throws SAXException, IOException {
+        Metadata mailMetadata = new Metadata();
+        mailMetadata.set(Metadata.RESOURCE_NAME_KEY, pstMail.getInternetMessageId());
+        mailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, pstMail.getInternetMessageId());
+        mailMetadata.set(TikaCoreProperties.IDENTIFIER, pstMail.getInternetMessageId());
+        mailMetadata.set(TikaCoreProperties.TITLE, pstMail.getSubject());
+        mailMetadata.set(Metadata.MESSAGE_FROM, pstMail.getSenderName());
+        mailMetadata.set(TikaCoreProperties.CREATOR, pstMail.getSenderName());
+        mailMetadata.set(TikaCoreProperties.CREATED, pstMail.getCreationTime());
+        mailMetadata.set(TikaCoreProperties.MODIFIED, pstMail.getLastModificationTime());
+        mailMetadata.set(TikaCoreProperties.COMMENTS, pstMail.getComment());
+        mailMetadata.set("descriptorNodeId", valueOf(pstMail.getDescriptorNodeId()));
+        mailMetadata.set("senderEmailAddress", pstMail.getSenderEmailAddress());
+        mailMetadata.set("recipients", pstMail.getRecipientsString());
+        mailMetadata.set("displayTo", pstMail.getDisplayTo());
+        mailMetadata.set("displayCC", pstMail.getDisplayCC());
+        mailMetadata.set("displayBCC", pstMail.getDisplayBCC());
+        mailMetadata.set("importance", valueOf(pstMail.getImportance()));
+        mailMetadata.set("priority", valueOf(pstMail.getPriority()));
+        mailMetadata.set("flagged", valueOf(pstMail.isFlagged()));
+
+        byte[] mailContent = pstMail.getBody().getBytes(UTF_8);
+        embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent), handler, mailMetadata, true);
+    }
+
+    private void parseMailAttachments(XHTMLContentHandler xhtml, PSTMessage email, EmbeddedDocumentExtractor embeddedExtractor)
+            throws TikaException {
+        int numberOfAttachments = email.getNumberOfAttachments();
+        for (int i = 0; i < numberOfAttachments; i++) {
+            File tempFile = null;
+            try {
+                PSTAttachment attach = email.getAttachment(i);
+
+                // Get the filename; both long and short filenames can be used for attachments
+                String filename = attach.getLongFilename();
+                if (filename.isEmpty()) {
+                    filename = attach.getFilename();
+                }
+
+                xhtml.element("p", filename);
+
+                Metadata attachMeta = new Metadata();
+                attachMeta.set(Metadata.RESOURCE_NAME_KEY, filename);
+                attachMeta.set(Metadata.EMBEDDED_RELATIONSHIP_ID, filename);
+                AttributesImpl attributes = new AttributesImpl();
+                attributes.addAttribute("", "class", "class", "CDATA", "embedded");
+                attributes.addAttribute("", "id", "id", "CDATA", filename);
+                xhtml.startElement("div", attributes);
+                if (embeddedExtractor.shouldParseEmbedded(attachMeta)) {
+                    TemporaryResources tmp = new TemporaryResources();
+                    try {
+                        TikaInputStream tis = TikaInputStream.get(attach.getFileInputStream(), tmp);
+                        embeddedExtractor.parseEmbedded(tis, xhtml, attachMeta, true);
+                    } finally {
+                        tmp.dispose();
+                    }
+                }
+                xhtml.endElement("div");
+
+            } catch (Exception e) {
+                throw new TikaException("Unable to unpack document stream", e);
+            } finally {
+                if (tempFile != null)
+                    tempFile.delete();
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
index 36439b8..fa932a6 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
@@ -1,99 +1,99 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.odf;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.Locale;
-
-import org.apache.tika.sax.ContentHandlerDecorator;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.AttributesImpl;
-
-/**
- * Content handler decorator that:<ul>
- * <li>Maps old OpenOffice 1.0 Namespaces to the OpenDocument ones</li>
- * <li>Returns a fake DTD when parser requests OpenOffice DTD</li>
- * </ul>
- */
-public class NSNormalizerContentHandler extends ContentHandlerDecorator {
-
-    private static final String OLD_NS =
-            "http://openoffice.org/2000/";
-
-    private static final String NEW_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:";
-
-    private static final String DTD_PUBLIC_ID =
-            "-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
-
-    public NSNormalizerContentHandler(ContentHandler handler) {
-        super(handler);
-    }
-
-    private String mapOldNS(String ns) {
-        if (ns != null && ns.startsWith(OLD_NS)) {
-            return NEW_NS + ns.substring(OLD_NS.length()) + ":1.0";
-        } else {
-            return ns;
-        }
-    }
-
-    @Override
-    public void startElement(
-            String namespaceURI, String localName, String qName,
-            Attributes atts) throws SAXException {
-        AttributesImpl natts = new AttributesImpl();
-        for (int i = 0; i < atts.getLength(); i++) {
-            natts.addAttribute(
-                    mapOldNS(atts.getURI(i)), atts.getLocalName(i),
-                    atts.getQName(i), atts.getType(i), atts.getValue(i));
-        }
-        super.startElement(mapOldNS(namespaceURI), localName, qName, atts);
-    }
-
-    @Override
-    public void endElement(String namespaceURI, String localName, String qName)
-            throws SAXException {
-        super.endElement(mapOldNS(namespaceURI), localName, qName);
-    }
-
-    @Override
-    public void startPrefixMapping(String prefix, String uri)
-            throws SAXException {
-        super.startPrefixMapping(prefix, mapOldNS(uri));
-    }
-
-    /**
-     * do not load any DTDs (may be requested by parser). Fake the DTD by
-     * returning a empty string as InputSource
-     */
-    @Override
-    public InputSource resolveEntity(String publicId, String systemId)
-            throws IOException, SAXException {
-        if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd"))
-                || DTD_PUBLIC_ID.equals(publicId)) {
-            return new InputSource(new StringReader(""));
-        } else {
-            return super.resolveEntity(publicId, systemId);
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.odf;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Locale;
+
+import org.apache.tika.sax.ContentHandlerDecorator;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * Content handler decorator that:<ul>
+ * <li>Maps old OpenOffice 1.0 Namespaces to the OpenDocument ones</li>
+ * <li>Returns a fake DTD when parser requests OpenOffice DTD</li>
+ * </ul>
+ */
+public class NSNormalizerContentHandler extends ContentHandlerDecorator {
+
+    private static final String OLD_NS =
+            "http://openoffice.org/2000/";
+
+    private static final String NEW_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:";
+
+    private static final String DTD_PUBLIC_ID =
+            "-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
+
+    public NSNormalizerContentHandler(ContentHandler handler) {
+        super(handler);
+    }
+
+    private String mapOldNS(String ns) {
+        if (ns != null && ns.startsWith(OLD_NS)) {
+            return NEW_NS + ns.substring(OLD_NS.length()) + ":1.0";
+        } else {
+            return ns;
+        }
+    }
+
+    @Override
+    public void startElement(
+            String namespaceURI, String localName, String qName,
+            Attributes atts) throws SAXException {
+        AttributesImpl natts = new AttributesImpl();
+        for (int i = 0; i < atts.getLength(); i++) {
+            natts.addAttribute(
+                    mapOldNS(atts.getURI(i)), atts.getLocalName(i),
+                    atts.getQName(i), atts.getType(i), atts.getValue(i));
+        }
+        super.startElement(mapOldNS(namespaceURI), localName, qName, atts);
+    }
+
+    @Override
+    public void endElement(String namespaceURI, String localName, String qName)
+            throws SAXException {
+        super.endElement(mapOldNS(namespaceURI), localName, qName);
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri)
+            throws SAXException {
+        super.startPrefixMapping(prefix, mapOldNS(uri));
+    }
+
+    /**
+     * do not load any DTDs (may be requested by parser). Fake the DTD by
+     * returning a empty string as InputSource
+     */
+    @Override
+    public InputSource resolveEntity(String publicId, String systemId)
+            throws IOException, SAXException {
+        if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd"))
+                || DTD_PUBLIC_ID.equals(publicId)) {
+            return new InputSource(new StringReader(""));
+        } else {
+            return super.resolveEntity(publicId, systemId);
+        }
+    }
+
+}