You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2013/08/24 18:42:09 UTC

svn commit: r1517166 - in /uima/sandbox/uimafit/trunk: uimafit-core/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java uimafit-legacy-support/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java

Author: rec
Date: Sat Aug 24 16:42:08 2013
New Revision: 1517166

URL: http://svn.apache.org/r1517166
Log:
[UIMA-3201] Test files for different line-ending styles use svn:native
- Added additional tests to check for desired line endings

Modified:
    uima/sandbox/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java
    uima/sandbox/uimafit/trunk/uimafit-legacy-support/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java

Modified: uima/sandbox/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java?rev=1517166&r1=1517165&r2=1517166&view=diff
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java (original)
+++ uima/sandbox/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java Sat Aug 24 16:42:08 2013
@@ -22,9 +22,11 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertNotNull;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.util.Collection;
 import java.util.Iterator;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.uima.UIMAException;
 import org.apache.uima.cas.FSIndex;
 import org.apache.uima.cas.FSIterator;
@@ -198,8 +200,14 @@ public class TokenBuilderTest extends Co
 
   @Test
   public void testNewlinesFromFile() throws Exception {
-    String text = FileUtil.loadTextFile(new File("src/test/resources/data/docs/unix-newlines.txt.bin"),
-            "UTF-8");
+    File unixNewlines = new File("src/test/resources/data/docs/unix-newlines.txt.bin");
+    assertEquals(55, unixNewlines.length());
+    byte[] unixNewlinesBytes = IOUtils.toByteArray(new FileInputStream(unixNewlines));
+    assertEquals('.', unixNewlinesBytes[13]);
+    assertEquals(0x0A, unixNewlinesBytes[14]);
+    assertEquals('s', unixNewlinesBytes[15]);
+    
+    String text = FileUtil.loadTextFile(unixNewlines, "UTF-8");
     text = text.substring(1); // remove "\uFEFF" character from beginning of text
     tokenBuilder.buildTokens(jCas, text);
 
@@ -212,8 +220,14 @@ public class TokenBuilderTest extends Co
     assertEquals("sentence 4.", iterator.next().getCoveredText());
 
     jCas.reset();
-    text = FileUtil.loadTextFile(new File("src/test/resources/data/docs/windows-newlines.txt.bin"),
-            "UTF-8");
+    File windowsNewlines = new File("src/test/resources/data/docs/windows-newlines.txt.bin");
+    text = FileUtil.loadTextFile(windowsNewlines, "UTF-8");
+    assertEquals(65, windowsNewlines.length());
+    byte[] windowsNewlinesBytes = IOUtils.toByteArray(new FileInputStream(windowsNewlines));
+    assertEquals('.', windowsNewlinesBytes[13]);
+    assertEquals(0x0D, windowsNewlinesBytes[14]);
+    assertEquals(0x0A, windowsNewlinesBytes[15]);
+    assertEquals('s', windowsNewlinesBytes[16]);
     text = text.substring(1); // remove "\uFEFF" character from beginning of text
     tokenBuilder.buildTokens(jCas, text);
 

Modified: uima/sandbox/uimafit/trunk/uimafit-legacy-support/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uimafit/trunk/uimafit-legacy-support/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java?rev=1517166&r1=1517165&r2=1517166&view=diff
==============================================================================
--- uima/sandbox/uimafit/trunk/uimafit-legacy-support/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java (original)
+++ uima/sandbox/uimafit/trunk/uimafit-legacy-support/src/test/java/org/apache/uima/fit/testing/factory/TokenBuilderTest.java Sat Aug 24 16:42:08 2013
@@ -22,9 +22,11 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertNotNull;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.util.Collection;
 import java.util.Iterator;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.uima.UIMAException;
 import org.apache.uima.cas.FSIndex;
 import org.apache.uima.cas.FSIterator;
@@ -198,8 +200,14 @@ public class TokenBuilderTest extends Co
 
   @Test
   public void testNewlinesFromFile() throws Exception {
-    String text = FileUtil.loadTextFile(new File("src/test/resources/data/docs/unix-newlines.txt.bin"),
-            "UTF-8");
+    File unixNewlines = new File("src/test/resources/data/docs/unix-newlines.txt.bin");
+    assertEquals(55, unixNewlines.length());
+    byte[] unixNewlinesBytes = IOUtils.toByteArray(new FileInputStream(unixNewlines));
+    assertEquals('.', unixNewlinesBytes[13]);
+    assertEquals(0x0A, unixNewlinesBytes[14]);
+    assertEquals('s', unixNewlinesBytes[15]);
+    
+    String text = FileUtil.loadTextFile(unixNewlines, "UTF-8");
     text = text.substring(1); // remove "\uFEFF" character from beginning of text
     tokenBuilder.buildTokens(jCas, text);
 
@@ -212,8 +220,14 @@ public class TokenBuilderTest extends Co
     assertEquals("sentence 4.", iterator.next().getCoveredText());
 
     jCas.reset();
-    text = FileUtil.loadTextFile(new File("src/test/resources/data/docs/windows-newlines.txt.bin"),
-            "UTF-8");
+    File windowsNewlines = new File("src/test/resources/data/docs/windows-newlines.txt.bin");
+    text = FileUtil.loadTextFile(windowsNewlines, "UTF-8");
+    assertEquals(65, windowsNewlines.length());
+    byte[] windowsNewlinesBytes = IOUtils.toByteArray(new FileInputStream(windowsNewlines));
+    assertEquals('.', windowsNewlinesBytes[13]);
+    assertEquals(0x0D, windowsNewlinesBytes[14]);
+    assertEquals(0x0A, windowsNewlinesBytes[15]);
+    assertEquals('s', windowsNewlinesBytes[16]);
     text = text.substring(1); // remove "\uFEFF" character from beginning of text
     tokenBuilder.buildTokens(jCas, text);