You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ma...@apache.org on 2010/08/04 14:43:59 UTC

svn commit: r982238 - in /poi/trunk: src/scratchpad/src/org/apache/poi/hwpf/usermodel/ src/scratchpad/testcases/org/apache/poi/hwpf/extractor/ test-data/document/

Author: maxcom
Date: Wed Aug  4 12:43:58 2010
New Revision: 982238

URL: http://svn.apache.org/viewvc?rev=982238&view=rev
Log:
hwpf: append any character data before paragraphs to first paragraph
(workaround for bug#48075)

Added:
    poi/trunk/test-data/document/MBD001D0B89.doc   (with props)
Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java?rev=982238&r1=982237&r2=982238&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java Wed Aug  4 12:43:58 2010
@@ -101,6 +101,14 @@ public class Paragraph extends Range imp
     _istd = papx.getIstd();
   }
 
+  protected Paragraph(PAPX papx, Range parent, int start)
+  {
+    super(Math.max(parent._start, start), Math.min(parent._end, papx.getEnd()), parent);
+    _props = papx.getParagraphProperties(_doc.getStyleSheet());
+    _papx = papx.getSprmBuf();
+    _istd = papx.getIstd();
+  }
+
   public short getStyleIndex()
   {
     return _istd;

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java?rev=982238&r1=982237&r2=982238&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java Wed Aug  4 12:43:58 2010
@@ -830,7 +830,11 @@ public class Range { // TODO -instantiab
 		if (props.getIlfo() > 0) {
 			pap = new ListEntry(papx, this, _doc.getListTables());
 		} else {
-			pap = new Paragraph(papx, this);
+            if (((index + _parStart)==0) && papx.getStart()>0) {
+                pap = new Paragraph(papx, this, 0);
+            } else {
+    			pap = new Paragraph(papx, this);
+            }
 		}
 
 		return pap;

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java?rev=982238&r1=982237&r2=982238&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractor.java Wed Aug  4 12:43:58 2010
@@ -298,4 +298,14 @@ public final class TestWordExtractor ext
         assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
         assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
     }
+
+    public void testFirstParagraphFix() throws Exception {
+        extractor = new WordExtractor(
+                POIDataSamples.getDocumentInstance().openResourceAsStream("MBD001D0B89.doc")
+        );
+
+        String text = extractor.getText();
+
+        assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
+    }
 }

Added: poi/trunk/test-data/document/MBD001D0B89.doc
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/document/MBD001D0B89.doc?rev=982238&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/document/MBD001D0B89.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org