You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/15 15:06:43 UTC

svn commit: r1482802 - in /ctakes/sandbox/ctakes-spelling-corrector: ./ src/org/apache/ctakes/spelling/mistakes/ src/org/apache/ctakes/spelling/priors/unigram/

Author: tmill
Date: Wed May 15 13:06:42 2013
New Revision: 1482802

URL: http://svn.apache.org/r1482802
Log:
Small changes for printing out status info, version update in pom.

Modified:
    ctakes/sandbox/ctakes-spelling-corrector/pom.xml
    ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java
    ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java
    ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java

Modified: ctakes/sandbox/ctakes-spelling-corrector/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/pom.xml?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/pom.xml (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/pom.xml Wed May 15 13:06:42 2013
@@ -19,7 +19,7 @@
   <parent>
   	<groupId>org.apache.ctakes</groupId>
   	<artifactId>ctakes</artifactId>
-  	<version>3.1.0-incubating-SNAPSHOT</version>
+  	<version>3.1.0-SNAPSHOT</version>
   </parent>
   <dependencies>
   	<dependency>

Modified: ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java Wed May 15 13:06:42 2013
@@ -51,7 +51,7 @@ public class GenerateContextTriples {
 			System.err.println("Required arguments: <neighborhood file> <input files> <output dir>");
 			System.exit(-1);
 		}
-		
+		long start = System.currentTimeMillis();
 		HashMap<String,String[]> dict = new HashMap<String,String[]>();
 //		HashMap<String,ClusterNode> dict = new HashMap<String,ClusterNode>();
 		HashMap<String,CounterMap<String>> contexts = new HashMap<String,CounterMap<String>>();
@@ -160,6 +160,7 @@ public class GenerateContextTriples {
 				System.exit(-1);
 			}
 		}
+		System.out.printf("Completed in %f seconds\n", (System.currentTimeMillis()-start) / 1000.0);
 	}
 	
 	class ClusterNode{

Modified: ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java Wed May 15 13:06:42 2013
@@ -39,6 +39,7 @@ public class GenerateTermNeighborhoods {
 			System.exit(-1);
 		}
 		
+		long start = System.currentTimeMillis();
 		JaspellTernarySearchTrie trie=null;
 		try {
 			trie = new JaspellTernarySearchTrie(new File(args[0]));
@@ -71,7 +72,7 @@ public class GenerateTermNeighborhoods {
 			}else{
 				maxDiff = 3;
 			}
-			int count = (Integer) trie.get(word);
+			float count = (Float) trie.get(word);
 			HashSet<String> neighbors = new HashSet<String>();
 			for(int diff = 0; diff <= maxDiff; diff++){
 				trie.setMatchAlmostDiff(diff);
@@ -80,7 +81,7 @@ public class GenerateTermNeighborhoods {
 			
 			HashSet<String> toRemove = new HashSet<String>();
 			for(String neighbor : neighbors){
-				int nCount = (Integer) trie.get(neighbor);
+				float nCount = (Float) trie.get(neighbor);
 				if(count / nCount < 10){
 					toRemove.add(neighbor);
 				}
@@ -97,6 +98,7 @@ public class GenerateTermNeighborhoods {
 			
 			scanner.nextLine(); // go to next line
 		}
+		System.out.printf("Completed after %f seconds\n", (System.currentTimeMillis()-start) / 1000.0);
 	}
 
 }

Modified: ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java Wed May 15 13:06:42 2013
@@ -24,6 +24,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.WordToken;
 import org.apache.uima.UIMAException;
@@ -54,7 +55,7 @@ public class UnigramPriorGenerator {
 		}
 		
 //		JCas jcas = null;
-		AnalysisEngine ae = AnalysisEngineFactory.createAnalysisEngineFromPath("../ctakes-core/desc/analysis_engine/AggregateAE.xml");
+		AnalysisEngine ae = AnalysisEngineFactory.createAnalysisEngineFromPath("/home/tmill/Projects/apache-ctakes/ctakes/ctakes-core/desc/analysis_engine/AggregateAE.xml");
 		CollectionReader reader = CollectionReaderFactory.createCollectionReader(FilesInDirectoryCollectionReader.class
 				, FilesInDirectoryCollectionReader.PARAM_INPUTDIR
 				, args[1]
@@ -67,8 +68,17 @@ public class UnigramPriorGenerator {
 		// iterate over directories passed in at command line
 		int numTokens = 0;
 		JCasIterable casIter = new JCasIterable(reader, ae);
+		String docId="START";
+		JCas jcas = null;
 		while(casIter.hasNext()){
-			JCas jcas = casIter.next();
+		  try{
+		    jcas = casIter.next();
+	      docId = DocumentIDAnnotationUtil.getDocumentID(jcas);
+		  }catch(Exception e){
+		    System.err.println("Error with " + docId);
+		    continue;
+		  }
+			System.out.println(docId);
 			FSIterator<Annotation> iter = jcas.getAnnotationIndex(BaseToken.type).iterator();
 			while(iter.hasNext()){
 				BaseToken tok = (BaseToken) iter.next();