You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ye...@apache.org on 2009/06/19 15:45:56 UTC

svn commit: r786505 - in /poi/trunk/src: documentation/content/xdocs/ scratchpad/src/org/apache/poi/hwpf/model/ scratchpad/testcases/org/apache/poi/hwpf/data/ scratchpad/testcases/org/apache/poi/hwpf/usermodel/

Author: yegor
Date: Fri Jun 19 13:45:55 2009
New Revision: 786505

URL: http://svn.apache.org/viewvc?rev=786505&view=rev
Log:
improved HWPF to better handle unicode, patch provided by Benjamin Engele and Maxim Valyanskiy, see Bugzilla #46610

Added:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java   (with props)
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc   (with props)
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc   (with props)
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_3.doc   (with props)
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java   (with props)
Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Fri Jun 19 13:45:55 2009
@@ -33,6 +33,7 @@
 
     <changes>
         <release version="3.5-beta7" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">46610 - Improved HWPF to better handle unicode</action>
            <action dev="POI-DEVELOPERS" type="fix">47261 - Fixed SlideShow#removeSlide to remove references to Notes</action>
            <action dev="POI-DEVELOPERS" type="fix">47375 - Fixed HSSFHyperlink to correctly set inter-sheet and file links</action>
            <action dev="POI-DEVELOPERS" type="fix">47384 - Fixed ExternalNameRecord to handle unicode names</action>

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java Fri Jun 19 13:45:55 2009
@@ -25,37 +25,28 @@
  *  and characters.
  */
 public abstract class BytePropertyNode extends PropertyNode {
-	private boolean isUnicode;
+        private final int startBytes;
+        private final int endBytes;
 
 	/**
 	 * @param fcStart The start of the text for this property, in _bytes_
 	 * @param fcEnd The end of the text for this property, in _bytes_
 	 */
-	public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) {
+	public BytePropertyNode(int fcStart, int fcEnd, CharIndexTranslator translator, Object buf) {
 		super(
-				generateCp(fcStart, isUnicode),
-				generateCp(fcEnd, isUnicode),
+				translator.getCharIndex(fcStart),
+				translator.getCharIndex(fcEnd),
 				buf
 		);
-		this.isUnicode = isUnicode;
-	}
-	private static int generateCp(int val, boolean isUnicode) {
-		if(isUnicode)
-			return val/2;
-		return val;
+                this.startBytes = fcStart;
+                this.endBytes = fcEnd;
 	}
 
-	public boolean isUnicode() {
-		return isUnicode;
-	}
 	public int getStartBytes() {
-		if(isUnicode)
-			return getStart()*2;
-		return getStart();
+                return startBytes;
 	}
+
 	public int getEndBytes() {
-		if(isUnicode)
-			return getEnd()*2;
-		return getEnd();
+                return endBytes;
 	}
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java Fri Jun 19 13:45:55 2009
@@ -119,9 +119,8 @@
 
   public void insert(int listIndex, int cpStart, SprmBuffer buf)
   {
-	boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
 
-    CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
+    CHPX insertChpx = new CHPX(0, 0, tpt,buf);
 
     // Ensure character offsets are really characters
     insertChpx.setStart(cpStart);
@@ -141,7 +140,7 @@
     	//  Original, until insert at point
     	//  New one
     	//  Clone of original, on to the old end
-        CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode);
+        CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf());
         // Again ensure contains character based offsets no matter what
         clone.setStart(cpStart);
         clone.setEnd(chpx.getEnd());

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java Fri Jun 19 13:45:55 2009
@@ -60,8 +60,9 @@
 
       for (int x = 0; x < _crun; x++)
       {
-    	boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) );
-        _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
+    	int startAt = getStart(x);
+		int endAt = getEnd(x);
+		_chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
       }
     }
 

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java Fri Jun 19 13:45:55 2009
@@ -34,14 +34,14 @@
 public final class CHPX extends BytePropertyNode
 {
 
-  public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode)
+  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
   {
-    super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode);
+    super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
   }
 
-  public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode)
+  public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
   {
-    super(fcStart, fcEnd, buf, isUnicode);
+    super(fcStart, fcEnd, translator ,buf);
   }
 
 

Added: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java?rev=786505&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java (added)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java Fri Jun 19 13:45:55 2009
@@ -0,0 +1,40 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.model;
+
+public interface CharIndexTranslator {
+
+    /**
+     * Calculates the char index of the given byte index.
+     *
+     * @param bytePos The character offset to check 
+     * @return the char index
+     */
+    int getCharIndex(int bytePos);
+
+    /**
+     * Is the text at the given byte offset unicode, or plain old ascii? In a
+     * very evil fashion, you have to actually know this to make sense of
+     * character and paragraph properties :(
+     *
+     * @param bytePos The character offset to check about
+     * @return true if the text at the given byte offset is unicode
+     */
+    boolean isUnicodeAtByteOffset(int bytePos);
+
+}

Propchange: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
------------------------------------------------------------------------------
    svn:executable = *

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java Fri Jun 19 13:45:55 2009
@@ -76,9 +76,8 @@
 
   public void insert(int listIndex, int cpStart, SprmBuffer buf)
   {
-    boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
 
-    PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
+    PAPX forInsert = new PAPX(0, 0, tpt, buf, _dataStream);
 
     // Ensure character offsets are really characters
     forInsert.setStart(cpStart);
@@ -108,7 +107,7 @@
     	//  Original, until insert at point
     	//  New one
     	//  Clone of original, on to the old end
-        PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode);
+        PAPX clone = new PAPX(0, 0, tpt, clonedBuf, _dataStream);
         // Again ensure contains character based offsets no matter what
         clone.setStart(cpStart);
         clone.setEnd(currentPap.getEnd());

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java Fri Jun 19 13:45:55 2009
@@ -62,14 +62,10 @@
     public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt)
     {
       super(documentStream, offset);
-
       for (int x = 0; x < _crun; x++) {
-         int startAt = getStart(x) - fcMin;
-         int endAt = getEnd(x) - fcMin;
-    	 boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt);
-         //System.err.println(startAt + " -> " + endAt + " = " + isUnicode);
-
-         _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
+         int startAt = getStart(x);
+         int endAt = getEnd(x);
+         _papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream));
       }
       _fkp = null;
       _dataStream = dataStream;

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java Fri Jun 19 13:45:55 2009
@@ -40,18 +40,18 @@
   private ParagraphHeight _phe;
   private int _hugeGrpprlOffset = -1;
 
-  public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode)
+  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
   {
-    super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode);
+    super(fcStart, fcEnd, translator, new SprmBuffer(papx));
     _phe = phe;
     SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
     if(buf != null)
       _buf = buf;
   }
 
-  public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode)
+  public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream)
   {
-    super(fcStart, fcEnd, buf, isUnicode);
+    super(fcStart, fcEnd, translator, buf);
     _phe = new ParagraphHeight();
     buf = findHuge(buf, dataStream);
     if(buf != null)

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java Fri Jun 19 13:45:55 2009
@@ -28,9 +28,9 @@
 
   SectionDescriptor _sed;
 
-  public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode)
+  public SEPX(SectionDescriptor sed, int start, int end, CharIndexTranslator translator, byte[] grpprl)
   {
-    super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode);
+    super(start, end, translator, SectionSprmUncompressor.uncompressSEP(grpprl, 0));
     _sed = sed;
   }
 

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java Fri Jun 19 13:45:55 2009
@@ -61,13 +61,10 @@
       int startAt = CPtoFC(node.getStart());
       int endAt = CPtoFC(node.getEnd());
 
-      boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
-//      System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);
-
       // check for the optimization
       if (fileOffset == 0xffffffff)
       {
-        _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart));
+        _sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0]));
       }
       else
       {
@@ -76,7 +73,7 @@
         byte[] buf = new byte[sepxSize];
         fileOffset += LittleEndian.SHORT_SIZE;
         System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
-        _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart));
+        _sections.add(new SEPX(sed, startAt, endAt, tpt, buf));
       }
     }
 
@@ -138,33 +135,13 @@
       }
       int FC = TP.getPieceDescriptor().getFilePosition();
       int offset = CP - TP.getCP();
-      FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition();
+      if (TP.isUnicode()) {
+        offset = offset*2;
+      }
+      FC = FC+offset;
       return FC;
     }
 
-    // Ryans code
-    private int FCtoCP(int fc)
-   {
-     int size = _text.size();
-     int cp = 0;
-     for (int x = 0; x < size; x++)
-     {
-       TextPiece piece = (TextPiece)_text.get(x);
-
-       if (fc <= piece.getEnd())
-       {
-         cp += (fc - piece.getStart());
-         break;
-       }
-       else
-       {
-         cp += (piece.getEnd() - piece.getStart());
-       }
-     }
-     return cp;
-   }
-
-
   public ArrayList getSections()
   {
     return _sections;
@@ -205,7 +182,7 @@
 
       // Line using Ryan's FCtoCP() conversion method -
       // unable to observe any effect on our testcases when using this code - piers
-      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray());
+      GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray());
 
 
       plex.addProperty(property);

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java?rev=786505&r1=786504&r2=786505&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java Fri Jun 19 13:45:55 2009
@@ -37,7 +37,7 @@
  *  convertion.
  * @author Ryan Ackley
  */
-public final class TextPieceTable
+public final class TextPieceTable implements CharIndexTranslator
 {
   protected ArrayList _textPieces = new ArrayList();
   //int _multiple;
@@ -150,31 +150,25 @@
 	  // If they ask off the end, just go with the last one...
 	  return lastWas;
   }
-  /**
-   * Is the text at the given byte offset
-   *  unicode, or plain old ascii?
-   * In a very evil fashion, you have to actually
-   *  know this to make sense of character and
-   *  paragraph properties :(
-   * @param bytePos The character offset to check about
-   */
+
   public boolean isUnicodeAtByteOffset(int bytePos) {
 	  boolean lastWas = false;
-	  int curByte = 0;
+	 
 
 	  Iterator it = _textPieces.iterator();
 	  while(it.hasNext()) {
 		  TextPiece tp = (TextPiece)it.next();
-		  int nextByte = curByte + tp.bytesLength();
+		  int curByte = tp.getPieceDescriptor().getFilePosition();
+		  int pieceEnd = curByte + tp.bytesLength();
 
 		  // If the text piece covers the character, all good
-		  if(curByte <= bytePos && nextByte >= bytePos) {
+		  if(curByte <= bytePos && pieceEnd > bytePos) {
 			  return tp.isUnicode();
 		  }
 		  // Otherwise keep track for the last one
 		  lastWas = tp.isUnicode();
 		  // Move along
-		  curByte = nextByte;
+		  curByte = pieceEnd;
 	  }
 
 	  // If they ask off the end, just go with the last one...
@@ -268,4 +262,34 @@
     }
     return false;
   }
+  	/* (non-Javadoc)
+	 * @see org.apache.poi.hwpf.model.CharIndexTranslator#getLengthInChars(int)
+	 */
+	public int getCharIndex(int bytePos) {
+		int charCount = 0;
+
+		Iterator it = _textPieces.iterator();
+		while (it.hasNext()) {
+			TextPiece tp = (TextPiece) it.next();
+			int pieceStart = tp.getPieceDescriptor().getFilePosition();
+			if(pieceStart >= bytePos) {
+				break;
+			}
+			
+			int bytesLength = tp.bytesLength();
+			int pieceEnd = pieceStart + bytesLength;
+
+			int toAdd = bytePos > pieceEnd ? bytesLength : bytesLength
+					- (pieceEnd - bytePos);
+
+			if (tp.isUnicode()) {
+				charCount += toAdd / 2;
+			} else {
+				charCount += toAdd;
+			}
+		}
+
+		return charCount;
+	}
+	
 }

Added: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc?rev=786505&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc
------------------------------------------------------------------------------
    svn:executable = *

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc?rev=786505&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc
------------------------------------------------------------------------------
    svn:executable = *

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_3.doc
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_3.doc?rev=786505&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_3.doc
------------------------------------------------------------------------------
    svn:executable = *

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_3.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java?rev=786505&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java (added)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java Fri Jun 19 13:45:55 2009
@@ -0,0 +1,72 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf.usermodel;
+
+import junit.framework.TestCase;
+
+import java.io.FileInputStream;
+
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.HWPFDocument;
+
+public class TestBug46610 extends TestCase {
+  private String dirname;
+
+  protected void setUp() throws Exception {
+    dirname = System.getProperty("HWPF.testdata.path");
+  }
+
+  public void testUtf() throws Exception {
+    HWPFDocument doc = new HWPFDocument(new FileInputStream(dirname + "/Bug46610_1.doc"));
+
+    runExtract(doc);
+  }
+
+  public void testUtf2() throws Exception {
+    HWPFDocument doc = new HWPFDocument(new FileInputStream(dirname + "/Bug46610_2.doc"));
+
+    runExtract(doc);
+  }
+
+  public void testExtraction() throws Exception {
+    HWPFDocument doc = new HWPFDocument(new FileInputStream(dirname + "/Bug46610_3.doc"));
+
+    String text = runExtract(doc);
+
+    assertTrue(text.contains("\u0421\u0412\u041e\u042e"));
+  }
+
+  private String runExtract(HWPFDocument doc) {
+    StringBuffer out = new StringBuffer();
+
+    Range globalRange = doc.getRange();
+    for (int i = 0; i < globalRange.numParagraphs(); i++) {
+      Paragraph p = globalRange.getParagraph(i);
+      out.append(p.text());
+      out.append("\n");
+      for (int j = 0; j < p.numCharacterRuns(); j++) {
+        CharacterRun characterRun = p.getCharacterRun(j);
+        characterRun.text();
+      }
+    }
+
+    return out.toString();
+  }
+}

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java
------------------------------------------------------------------------------
    svn:executable = *



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org