You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ms...@apache.org on 2016/08/15 19:31:59 UTC

svn commit: r1756416 - in /pdfbox/branches/2.0/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/interactive/form/ test/java/org/apache/pdfbox/pdmodel/interactive/form/

Author: msahyoun
Date: Mon Aug 15 19:31:58 2016
New Revision: 1756416

URL: http://svn.apache.org/viewvc?rev=1756416&view=rev
Log:
PDFBOX-3461: add handling for empty paragraphs; enhance unit tests; compare streams

Added:
    pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java
      - copied unchanged from r1756411, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PlainTextTest.java
Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java
    pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java?rev=1756416&r1=1756415&r2=1756416&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PlainText.java Mon Aug 15 19:31:58 2016
@@ -55,6 +55,11 @@ class PlainText
         paragraphs = new ArrayList<Paragraph>();
         for (String part : parts)
         {
+        	// Acrobat prints a space for an empty paragraph
+        	if (part.length() == 0)
+        	{
+        		part = " ";
+        	}
             paragraphs.add(new Paragraph(part));
         }
     }

Modified: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java?rev=1756416&r1=1756415&r2=1756416&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/ControlCharacterTest.java Mon Aug 15 19:31:58 2016
@@ -16,10 +16,22 @@
  */
 package org.apache.pdfbox.pdmodel.interactive.form;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.List;
 
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.pdfparser.PDFStreamParser;
 import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -78,45 +90,85 @@ public class ControlCharacterTest {
     @Test
     public void characterSPACE() throws IOException
     {
-    	acroForm.getField("pdfbox-space").setValue("SPACE SPACE");
+    	PDField field = acroForm.getField("pdfbox-space");
+    	field.setValue("SPACE SPACE");
+
+    	List<String> pdfboxValues = getStringsFromStream(field);
+    	List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-space"));
+
+    	assertEquals(pdfboxValues, acrobatValues);
     }
 
     @Test
     public void characterCR() throws IOException
     {
-    	acroForm.getField("pdfbox-cr").setValue("CR\rCR");
+    	PDField field = acroForm.getField("pdfbox-cr");
+    	field.setValue("CR\rCR");
+
+    	List<String> pdfboxValues = getStringsFromStream(field);
+    	List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-cr"));
+
+    	assertEquals(pdfboxValues, acrobatValues);
     }
 
     @Test
     public void characterLF() throws IOException
     {
-    	acroForm.getField("pdfbox-lf").setValue("LF\nLF");
+    	PDField field = acroForm.getField("pdfbox-lf");
+    	field.setValue("LF\nLF");
+
+    	List<String> pdfboxValues = getStringsFromStream(field);
+    	List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-lf"));
+
+    	assertEquals(pdfboxValues, acrobatValues);
     }
     
     @Test
     public void characterCRLF() throws IOException
     {
-    	acroForm.getField("pdfbox-crlf").setValue("CRLF\r\nCRLF");
+    	PDField field = acroForm.getField("pdfbox-crlf");
+    	field.setValue("CRLF\r\nCRLF");
+
+    	List<String> pdfboxValues = getStringsFromStream(field);
+    	List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-crlf"));
+
+    	assertEquals(pdfboxValues, acrobatValues);
     }
 
     @Test
     public void characterLFCR() throws IOException
     {
-    	acroForm.getField("pdfbox-lfcr").setValue("LFCR\r\nLFCR");
+    	PDField field = acroForm.getField("pdfbox-lfcr");
+    	field.setValue("LFCR\n\rLFCR");
+    	
+    	List<String> pdfboxValues = getStringsFromStream(field);
+    	List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-lfcr"));
+
+    	assertEquals(pdfboxValues, acrobatValues);
     }
     
     @Test
     public void characterUnicodeLinebreak() throws IOException
     {
-    	acroForm.getField("pdfbox-linebreak").setValue("linebreak\u2028linebreak");
+    	PDField field = acroForm.getField("pdfbox-linebreak");
+    	field.setValue("linebreak\u2028linebreak");
     	
+    	List<String> pdfboxValues = getStringsFromStream(field);
+    	List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-linebreak"));
+
+    	assertEquals(pdfboxValues, acrobatValues);
     }
     
     @Test
     public void characterUnicodeParagraphbreak() throws IOException
     {
-    	acroForm.getField("pdfbox-paragraphbreak").setValue("paragraphbreak\u2029paragraphbreak");
+    	PDField field = acroForm.getField("pdfbox-paragraphbreak");
+    	field.setValue("paragraphbreak\u2029paragraphbreak");
     	
+    	List<String> pdfboxValues = getStringsFromStream(field);
+    	List<String> acrobatValues = getStringsFromStream(acroForm.getField("acrobat-paragraphbreak"));
+
+    	assertEquals(pdfboxValues, acrobatValues);
     }
     
     @After
@@ -124,4 +176,27 @@ public class ControlCharacterTest {
     {
         document.close();
     }
+    
+    private List<String> getStringsFromStream(PDField field) throws IOException
+    {
+    	PDAnnotationWidget widget = field.getWidgets().get(0);
+    	PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream());
+    	
+    	Object token = parser.parseNextToken();
+    	
+    	List<String> stringValues = new ArrayList<String>();
+    	
+    	while (token != null)
+    	{
+    		if (token instanceof COSString)
+    		{
+    			// TODO: improve the string output to better match
+    			// trimming as Acrobat adds spaces to strings
+    			// where we don't
+    			stringValues.add(((COSString) token).getString().trim());
+    		}
+    		token = parser.parseNextToken();
+    	}
+    	return stringValues;   	
+    }
 }