You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by tm...@apache.org on 2013/05/15 15:06:43 UTC
svn commit: r1482802 - in /ctakes/sandbox/ctakes-spelling-corrector: ./
src/org/apache/ctakes/spelling/mistakes/
src/org/apache/ctakes/spelling/priors/unigram/
Author: tmill
Date: Wed May 15 13:06:42 2013
New Revision: 1482802
URL: http://svn.apache.org/r1482802
Log:
Small changes for printing out status info, version update in pom.
Modified:
ctakes/sandbox/ctakes-spelling-corrector/pom.xml
ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java
ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java
ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java
Modified: ctakes/sandbox/ctakes-spelling-corrector/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/pom.xml?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/pom.xml (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/pom.xml Wed May 15 13:06:42 2013
@@ -19,7 +19,7 @@
<parent>
<groupId>org.apache.ctakes</groupId>
<artifactId>ctakes</artifactId>
- <version>3.1.0-incubating-SNAPSHOT</version>
+ <version>3.1.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
Modified: ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateContextTriples.java Wed May 15 13:06:42 2013
@@ -51,7 +51,7 @@ public class GenerateContextTriples {
System.err.println("Required arguments: <neighborhood file> <input files> <output dir>");
System.exit(-1);
}
-
+ long start = System.currentTimeMillis();
HashMap<String,String[]> dict = new HashMap<String,String[]>();
// HashMap<String,ClusterNode> dict = new HashMap<String,ClusterNode>();
HashMap<String,CounterMap<String>> contexts = new HashMap<String,CounterMap<String>>();
@@ -160,6 +160,7 @@ public class GenerateContextTriples {
System.exit(-1);
}
}
+ System.out.printf("Completed in %f seconds\n", (System.currentTimeMillis()-start) / 1000.0);
}
class ClusterNode{
Modified: ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/mistakes/GenerateTermNeighborhoods.java Wed May 15 13:06:42 2013
@@ -39,6 +39,7 @@ public class GenerateTermNeighborhoods {
System.exit(-1);
}
+ long start = System.currentTimeMillis();
JaspellTernarySearchTrie trie=null;
try {
trie = new JaspellTernarySearchTrie(new File(args[0]));
@@ -71,7 +72,7 @@ public class GenerateTermNeighborhoods {
}else{
maxDiff = 3;
}
- int count = (Integer) trie.get(word);
+ float count = (Float) trie.get(word);
HashSet<String> neighbors = new HashSet<String>();
for(int diff = 0; diff <= maxDiff; diff++){
trie.setMatchAlmostDiff(diff);
@@ -80,7 +81,7 @@ public class GenerateTermNeighborhoods {
HashSet<String> toRemove = new HashSet<String>();
for(String neighbor : neighbors){
- int nCount = (Integer) trie.get(neighbor);
+ float nCount = (Float) trie.get(neighbor);
if(count / nCount < 10){
toRemove.add(neighbor);
}
@@ -97,6 +98,7 @@ public class GenerateTermNeighborhoods {
scanner.nextLine(); // go to next line
}
+ System.out.printf("Completed after %f seconds\n", (System.currentTimeMillis()-start) / 1000.0);
}
}
Modified: ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java?rev=1482802&r1=1482801&r2=1482802&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java (original)
+++ ctakes/sandbox/ctakes-spelling-corrector/src/org/apache/ctakes/spelling/priors/unigram/UnigramPriorGenerator.java Wed May 15 13:06:42 2013
@@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.uima.UIMAException;
@@ -54,7 +55,7 @@ public class UnigramPriorGenerator {
}
// JCas jcas = null;
- AnalysisEngine ae = AnalysisEngineFactory.createAnalysisEngineFromPath("../ctakes-core/desc/analysis_engine/AggregateAE.xml");
+ AnalysisEngine ae = AnalysisEngineFactory.createAnalysisEngineFromPath("/home/tmill/Projects/apache-ctakes/ctakes/ctakes-core/desc/analysis_engine/AggregateAE.xml");
CollectionReader reader = CollectionReaderFactory.createCollectionReader(FilesInDirectoryCollectionReader.class
, FilesInDirectoryCollectionReader.PARAM_INPUTDIR
, args[1]
@@ -67,8 +68,17 @@ public class UnigramPriorGenerator {
// iterate over directories passed in at command line
int numTokens = 0;
JCasIterable casIter = new JCasIterable(reader, ae);
+ String docId="START";
+ JCas jcas = null;
while(casIter.hasNext()){
- JCas jcas = casIter.next();
+ try{
+ jcas = casIter.next();
+ docId = DocumentIDAnnotationUtil.getDocumentID(jcas);
+ }catch(Exception e){
+ System.err.println("Error with " + docId);
+ continue;
+ }
+ System.out.println(docId);
FSIterator<Annotation> iter = jcas.getAnnotationIndex(BaseToken.type).iterator();
while(iter.hasNext()){
BaseToken tok = (BaseToken) iter.next();