You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/08/19 17:53:35 UTC

svn commit: r1696618 - in /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes: consumers/SentencePrinter.java index/IndexSentences.java

Author: dligach
Date: Wed Aug 19 15:53:35 2015
New Revision: 1696618

URL: http://svn.apache.org/r1696618
Log:
removing line breaks from sentences before indexing

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java?rev=1696618&r1=1696617&r2=1696618&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/SentencePrinter.java Wed Aug 19 15:53:35 2015
@@ -68,7 +68,9 @@ public class SentencePrinter {
     public void process(JCas jCas) throws AnalysisEngineProcessException {
       
       for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
-        System.out.println("* " + sentence.getCoveredText());
+        String withLineBreaks = sentence.getCoveredText();
+        String noLineBreaks = withLineBreaks.replace("\n", "");
+        System.out.println(" * " + noLineBreaks);
         System.out.println();
       }
     }

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java?rev=1696618&r1=1696617&r2=1696618&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/IndexSentences.java Wed Aug 19 15:53:35 2015
@@ -106,7 +106,9 @@ public class IndexSentences {
       
       for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
         Document document = new Document();
-        document.add(new Field("content", sentence.getCoveredText(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
+        String withLineBreaks = sentence.getCoveredText();
+        String noLineBreaks = withLineBreaks.replace("\n", "");
+        document.add(new Field("content", noLineBreaks, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
         try {
           indexWriter.addDocument(document);
         } catch (IOException e) {