You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2014/02/06 23:16:15 UTC
svn commit: r1565462 -
/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java
Author: tmill
Date: Thu Feb 6 22:16:15 2014
New Revision: 1565462
URL: http://svn.apache.org/r1565462
Log:
CTAKES-94 and CTAKES-210: Fixe semantic classes to use InputStream and to read a file instead of a directory.
Modified:
ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java?rev=1565462&r1=1565461&r2=1565462&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/SemanticClasses.java Thu Feb 6 22:16:15 2014
@@ -1,45 +1,34 @@
package org.apache.ctakes.assertion.util;
-import java.io.File;
-import java.io.FileNotFoundException;
+import java.io.InputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;
-import org.apache.uima.resource.ResourceInitializationException;
-
public class SemanticClasses extends HashMap<String,HashSet<String>>{
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- // loads files in the input directory into a hashmap that maps the filename minus the extension ("allergy.txt" becomes "allergy")
- // to the set of words in that file ("allergy" => ("allergic", "allergies", "allergy", ...)
- public SemanticClasses(String semClassDir) throws ResourceInitializationException{
- File classDir = new File(semClassDir);
- if(classDir.exists() && classDir.isDirectory()){
- File[] classFiles = classDir.listFiles();
- for(File semClass : classFiles){
- if(semClass.isDirectory() || semClass.isHidden()) continue;
- HashSet<String> classWords = new HashSet<String>();
- Scanner scanner = null;
- try {
- scanner = new Scanner(semClass);
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- throw new ResourceInitializationException("Error: Could not open file:", new Object[]{ semClass}, e);
- }
- while(scanner.hasNextLine()){
- String term = scanner.nextLine().trim();
- // if the term on this line is a multi-word expression, ignore, because we can't
- // place these in the tree anyways
- if(!term.contains(" ")){
- classWords.add(term);
- }
- }
- put(semClass.getName().replace(".txt", ""), classWords);
- }
- }
- }
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ // loads files in the input directory into a hashmap that maps the filename minus the extension ("allergy.txt" becomes "allergy")
+ // to the set of words in that file ("allergy" => ("allergic", "allergies", "allergy", ...)
+ public SemanticClasses(InputStream inStream){
+ Scanner scanner = new Scanner(inStream);
+ while(scanner.hasNextLine()){
+ String term = scanner.nextLine().trim();
+ // if the term on this line is a multi-word expression, ignore, because we can't
+ // place these in the tree anyways
+
+ String[] keyVal = term.split("\t");
+ if(keyVal.length != 2) continue;
+ if(!this.containsKey(keyVal[0])){
+ this.put(keyVal[0], new HashSet<String>());
+ }
+
+ if(!keyVal[1].contains(" ")){
+ this.get(keyVal[0]).add(keyVal[1]);
+ }
+ }
+ }
}