You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by dl...@apache.org on 2015/08/18 19:38:42 UTC
svn commit: r1696466 -
/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
Author: dligach
Date: Tue Aug 18 17:38:42 2015
New Revision: 1696466
URL: http://svn.apache.org/r1696466
Log:
printing entire senteces now instead of a few words on the sides of the match
Modified:
ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java?rev=1696466&r1=1696465&r2=1696466&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/index/SearchUtility.java Tue Aug 18 17:38:42 2015
@@ -18,9 +18,9 @@ public class SearchUtility {
public static void main(String[] args) throws IOException {
- final int maxHits = 250;
+ final int maxHits = 100;
final String searchField = "content";
- final String indexLocation = "/Users/Dima/Boston/Data/Mimic/Index/";
+ final String indexLocation = "/Users/dima/Boston/Data/DeepPhe/Index/";
String queryText = JOptionPane.showInputDialog("Enter query");
@@ -39,36 +39,11 @@ public class SearchUtility {
for(ScoreDoc scoreDoc : scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
String text = document.get(searchField).toLowerCase().replace('\n', ' ');
- String context = getContext(queryText, text, 20);
- System.out.println(context);
+ System.out.println(text);
}
// indexSearcher.close();
System.out.println("total hits: " + scoreDocs.length);
}
-
- /**
- * Get context for a string. Return "" if string not found in text.
- *
- * TODO: Occasionally no context is found when the indexer removed certain
- * characters which still exist in the source text. E.g. when "... pain, and swelling"
- * is in the source document, the query "pain and swelling" will return this document.
- * However, this method will not find the occurence of "pain and swelling" in the
- * document because of the comma.
- */
- public static String getContext(String string, String text, int characterWindow) {
-
- String noEOL = text.replace('\n', ' ');
- int begin = noEOL.indexOf(string);
- if(begin == -1) {
- return "";
- }
-
- int end = begin + string.length();
- int contextBegin = Math.max(0, begin - characterWindow);
- int contextEnd = Math.min(text.length(), end + characterWindow);
-
- return noEOL.substring(contextBegin, contextEnd);
- }
}