You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/09/04 19:12:18 UTC
svn commit: r1701295 -
/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
Author: dligach
Date: Fri Sep 4 17:12:17 2015
New Revision: 1701295
URL: http://svn.apache.org/r1701295
Log:
printing local context instead of entire sentence
Modified:
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java?rev=1701295&r1=1701294&r2=1701295&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java Fri Sep 4 17:12:17 2015
@@ -41,8 +41,8 @@ public class SearchUtility {
for(ScoreDoc scoreDoc : scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
String text = document.get(fieldName);
- System.out.println(text);
- System.out.println();
+ String context = getContext(queryText, text, 30);
+ System.out.println(context);
}
directory.close();
@@ -50,5 +50,29 @@ public class SearchUtility {
System.out.println("total hits: " + scoreDocs.length);
}
+
+ /**
+ * Get context for a string. Return "" if string not found in text.
+ *
+ * TODO: Occasionally no context is found when the indexer removed certain
+ * characters which still exist in the source text. E.g. when "... pain, and swelling"
+ * is in the source document, the query "pain and swelling" will return this document.
+ * However, this method will not find the occurence of "pain and swelling" in the
+ * document because of the comma.
+ */
+ public static String getContext(String string, String text, int characterWindow) {
+
+ String noEOL = text.replace('\n', ' ');
+ int begin = noEOL.indexOf(string);
+ if(begin == -1) {
+ return "";
+ }
+
+ int end = begin + string.length();
+ int contextBegin = Math.max(0, begin - characterWindow);
+ int contextEnd = Math.min(text.length(), end + characterWindow);
+
+ return noEOL.substring(contextBegin, contextEnd);
+ }
}