You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/02/06 23:16:15 UTC

svn commit: r1565462 - /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java

Author: tmill
Date: Thu Feb  6 22:16:15 2014
New Revision: 1565462

URL: http://svn.apache.org/r1565462
Log:
CTAKES-94 and CTAKES-210: Fixe semantic classes to use InputStream and to read a file instead of a directory.

Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java?rev=1565462&r1=1565461&r2=1565462&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java Thu Feb  6 22:16:15 2014
@@ -1,45 +1,34 @@
 package org.apache.ctakes.assertion.util;
 
-import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.InputStream;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Scanner;
 
-import org.apache.uima.resource.ResourceInitializationException;
-
 public class SemanticClasses extends HashMap<String,HashSet<String>>{
-	/**
-	 * 
-	 */
-	private static final long serialVersionUID = 1L;
-
-	// loads files in the input directory into a hashmap that maps the filename minus the extension ("allergy.txt" becomes "allergy")
-	// to the set of words in that file ("allergy" => ("allergic", "allergies", "allergy", ...)
-	public SemanticClasses(String semClassDir) throws ResourceInitializationException{
-		File classDir = new File(semClassDir);
-		if(classDir.exists() && classDir.isDirectory()){
-			File[] classFiles = classDir.listFiles();
-			for(File semClass : classFiles){
-				if(semClass.isDirectory() || semClass.isHidden()) continue;
-				HashSet<String> classWords = new HashSet<String>();
-				Scanner scanner = null;
-				try {
-					scanner = new Scanner(semClass);
-				} catch (FileNotFoundException e) {
-					e.printStackTrace();
-					throw new ResourceInitializationException("Error: Could not open file:", new Object[]{ semClass}, e);
-				}
-				while(scanner.hasNextLine()){
-					String term = scanner.nextLine().trim();
-					// if the term on this line is a multi-word expression, ignore, because we can't
-					// place these in the tree anyways
-					if(!term.contains(" ")){
-						classWords.add(term);
-					}
-				}
-				put(semClass.getName().replace(".txt", ""), classWords);
-			}
-		}
-	}
+  /**
+   * 
+   */
+  private static final long serialVersionUID = 1L;
+  
+  // loads files in the input directory into a hashmap that maps the filename minus the extension ("allergy.txt" becomes "allergy")
+  // to the set of words in that file ("allergy" => ("allergic", "allergies", "allergy", ...)
+  public SemanticClasses(InputStream inStream){
+    Scanner scanner = new Scanner(inStream);
+    while(scanner.hasNextLine()){
+      String term = scanner.nextLine().trim();
+      // if the term on this line is a multi-word expression, ignore, because we can't
+      // place these in the tree anyways
+      
+      String[] keyVal = term.split("\t");
+      if(keyVal.length != 2) continue;
+      if(!this.containsKey(keyVal[0])){
+        this.put(keyVal[0], new HashSet<String>());
+      }
+      
+      if(!keyVal[1].contains(" ")){
+        this.get(keyVal[0]).add(keyVal[1]);
+      }
+    }
+  }
 }