You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2017/03/29 21:49:16 UTC
svn commit: r1789417 - in
/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core:
ae/TokenizerAnnotatorPTB.java nlp/tokenizer/TokenizerPTB.java
Author: james-masanz
Date: Wed Mar 29 21:49:16 2017
New Revision: 1789417
URL: http://svn.apache.org/viewvc?rev=1789417&view=rev
Log:
update URL of Supplementary Guidelines for ETTB 2.0 (Addendum 2009)
Modified:
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/nlp/tokenizer/TokenizerPTB.java
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java?rev=1789417&r1=1789416&r2=1789417&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/TokenizerAnnotatorPTB.java Wed Mar 29 21:49:16 2017
@@ -217,6 +217,7 @@ public class TokenizerAnnotatorPTB exten
// From http://www.seas.upenn.edu/~jmott/2009_addendum.pdf
+// which was moved to https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/etb-supplementary-guidelines-2009-addendum.pdf
// modified by Task1.4.4_adoptedConventions[AL]_Feb28_2011.doc
//All strings separated by white space are treated as separate tokens.
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/nlp/tokenizer/TokenizerPTB.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/nlp/tokenizer/TokenizerPTB.java?rev=1789417&r1=1789416&r2=1789417&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/nlp/tokenizer/TokenizerPTB.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/nlp/tokenizer/TokenizerPTB.java Wed Mar 29 21:49:16 2017
@@ -48,7 +48,8 @@ import org.apache.ctakes.typesystem.type
/**
* A class used to break natural text into tokens following PTB rules.
* See Supplementary Guidelines for ETTB 2.0
- * dated April 6th, 2009.
+ * dated April 6th, 2009.
+ * https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/etb-supplementary-guidelines-2009-addendum.pdf
* The token markup is external to the text and is not embedded.
* Character offset location is used to identify the boundaries of a token.
*