You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by jo...@apache.org on 2011/08/01 17:25:50 UTC
svn commit: r1152824 [20/21] - in
/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker:
./ action/ batch/ condition/ engine/ kernel/ kernel/constraint/
kernel/expression/ kernel/expression/bool...
Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SeedLexer.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SeedLexer.java?rev=1152824&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SeedLexer.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SeedLexer.java Mon Aug 1 15:24:44 2011
@@ -0,0 +1,884 @@
+/* The following code was generated by JFlex 1.4.3 on 19.04.10 17:07 */
+
+package org.apache.uima.tm.textmarker.seed;
+import java.util.*;
+import java.util.regex.*;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.tm.textmarker.kernel.type.TextMarkerBasic;
+import org.apache.uima.tm.type.AMP;
+import org.apache.uima.tm.type.BREAK;
+import org.apache.uima.tm.type.CAP;
+import org.apache.uima.tm.type.COLON;
+import org.apache.uima.tm.type.COMMA;
+import org.apache.uima.tm.type.CW;
+import org.apache.uima.tm.type.EXCLAMATION;
+import org.apache.uima.tm.type.MARKUP;
+import org.apache.uima.tm.type.NBSP;
+import org.apache.uima.tm.type.NUM;
+import org.apache.uima.tm.type.PERIOD;
+import org.apache.uima.tm.type.QUESTION;
+import org.apache.uima.tm.type.SEMICOLON;
+import org.apache.uima.tm.type.SPACE;
+import org.apache.uima.tm.type.SPECIAL;
+import org.apache.uima.tm.type.SW;
+
+
+
+/**
+ * This class is a scanner generated by
+ * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
+ * on 19.04.10 17:07 from the specification file
+ * <tt>D:/work/workspace-tm/org.apache.uima.tm.textmarker.engine/src/de/uniwue/tm/textmarker/scanner/SeedLexer.flex</tt>
+ */
+class SeedLexer {
+
+ /** This character denotes the end of file */
+ public static final int YYEOF = -1;
+
+ /** initial size of the lookahead buffer */
+ private static final int ZZ_BUFFERSIZE = 16384;
+
+ /** lexical states */
+ public static final int YYINITIAL = 0;
+
+ /**
+ * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+ * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+ * at the beginning of a line
+ * l is of the form l = 2*k, k a non negative integer
+ */
+ private static final int ZZ_LEXSTATE[] = {
+ 0, 0
+ };
+
+ /**
+ * Translates characters to character classes
+ */
+ private static final String ZZ_CMAP_PACKED =
+ "\10\0\1\4\1\3\1\4\2\0\1\4\22\0\1\3\1\7\4\0"+
+ "\1\12\5\0\1\25\1\0\1\26\1\6\12\2\1\24\1\17\1\5"+
+ "\1\0\1\10\1\27\1\0\1\1\1\21\13\1\1\20\1\1\1\23"+
+ "\2\1\1\22\7\1\6\0\1\31\1\14\13\31\1\13\1\31\1\16"+
+ "\2\31\1\15\7\31\45\0\1\11\11\0\1\30\12\0\1\30\4\0"+
+ "\1\30\5\0\27\32\1\0\7\32\30\30\1\0\10\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\2\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\2\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\2\32\1\30\1\32\1\30"+
+ "\1\32\3\30\2\32\1\30\1\32\1\30\2\32\1\30\3\32\2\30"+
+ "\4\32\1\30\2\32\1\30\3\32\3\30\2\32\1\30\2\32\1\30"+
+ "\1\32\1\30\1\32\1\30\2\32\1\30\1\32\2\30\1\32\1\30"+
+ "\2\32\1\30\3\32\1\30\1\32\1\30\2\32\2\30\1\0\1\32"+
+ "\3\30\4\0\1\32\1\0\1\30\1\32\1\0\1\30\1\32\1\0"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\2\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\2\30\1\32\1\0\1\30\1\32\1\30"+
+ "\3\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\4\30"+
+ "\31\0\140\30\326\0\1\32\1\0\3\32\1\0\1\32\1\0\2\32"+
+ "\1\30\21\32\1\0\11\32\43\30\1\0\2\30\3\32\3\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\5\30\1\32\1\30\1\0\1\32\1\30\2\32\1\30"+
+ "\4\0\60\32\60\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\10\0\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\2\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\0\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\2\0\1\32\1\30\6\0\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\41\0\46\32\12\0\47\30\u0b18\0"+
+ "\46\32\u0c3a\0\54\30\66\0\12\30\224\0\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+
+ "\1\32\1\30\1\32\1\30\1\32\7\30\4\0\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+
+ "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\6\0\10\30\10\32"+
+ "\6\30\2\0\6\32\2\0\10\30\10\32\10\30\10\32\6\30\2\0"+
+ "\6\32\2\0\10\30\1\0\1\32\1\0\1\32\1\0\1\32\1\0"+
+ "\1\32\10\30\10\32\16\30\2\0\10\30\10\0\10\30\10\0\10\30"+
+ "\10\0\5\30\1\0\2\30\4\32\2\0\1\30\3\0\3\30\1\0"+
+ "\2\30\4\32\4\0\4\30\2\0\2\30\4\32\4\0\10\30\5\32"+
+ "\5\0\3\30\1\0\2\30\4\32\165\0\1\30\15\0\1\30\202\0"+
+ "\1\32\4\0\1\32\2\0\1\30\3\32\2\30\3\32\1\30\1\0"+
+ "\1\32\3\0\5\32\6\0\1\32\1\0\1\32\1\0\1\32\1\0"+
+ "\4\32\1\0\1\30\2\32\1\0\1\32\1\30\4\0\1\30\3\0"+
+ "\1\30\2\32\5\0\1\32\4\30\ud9b6\0\7\30\14\0\5\30\u0409\0"+
+ "\32\32\6\0\32\30\245\0";
+
+ /**
+ * Translates characters to character classes
+ */
+ private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+ /**
+ * Translates DFA states to action switch labels.
+ */
+ private static final int [] ZZ_ACTION = zzUnpackAction();
+
+ private static final String ZZ_ACTION_PACKED_0 =
+ "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
+ "\1\7\1\1\1\10\1\11\1\12\1\13\1\14\1\15"+
+ "\1\16\1\2\5\0\1\17\1\0\1\20\2\0\1\21"+
+ "\3\0";
+
+ private static int [] zzUnpackAction() {
+ int [] result = new int[32];
+ int offset = 0;
+ offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackAction(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ do result[j++] = value; while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /**
+ * Translates a state to a row index in the transition table
+ */
+ private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
+
+ private static final String ZZ_ROWMAP_PACKED_0 =
+ "\0\0\0\33\0\66\0\121\0\33\0\33\0\154\0\33"+
+ "\0\33\0\207\0\242\0\33\0\33\0\33\0\33\0\33"+
+ "\0\275\0\330\0\363\0\u010e\0\u0129\0\u0144\0\u015f\0\33"+
+ "\0\u017a\0\33\0\u0195\0\u01b0\0\33\0\u01cb\0\u01e6\0\u0201";
+
+ private static int [] zzUnpackRowMap() {
+ int [] result = new int[32];
+ int offset = 0;
+ offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackRowMap(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int high = packed.charAt(i++) << 16;
+ result[j++] = high | packed.charAt(i++);
+ }
+ return j;
+ }
+
+ /**
+ * The transition table of the DFA
+ */
+ private static final int [] ZZ_TRANS = zzUnpackTrans();
+
+ private static final String ZZ_TRANS_PACKED_0 =
+ "\1\2\1\3\1\4\1\5\1\6\1\7\1\2\1\10"+
+ "\1\2\1\11\1\12\4\13\1\14\4\3\1\15\1\16"+
+ "\1\17\1\20\2\13\1\3\34\0\1\21\11\0\4\22"+
+ "\1\0\4\21\4\0\2\22\1\21\2\0\1\4\31\0"+
+ "\1\23\4\0\1\24\1\23\3\0\4\23\1\0\4\23"+
+ "\5\0\1\23\2\0\1\25\11\0\1\26\3\25\1\0"+
+ "\1\27\3\25\5\0\1\25\14\0\4\13\11\0\2\13"+
+ "\2\0\1\21\16\0\4\21\6\0\1\21\13\0\4\22"+
+ "\11\0\2\22\1\0\10\23\1\30\22\23\1\0\1\31"+
+ "\5\0\1\31\3\0\4\31\1\0\4\31\5\0\1\31"+
+ "\2\0\1\25\11\0\4\25\1\32\4\25\5\0\1\25"+
+ "\2\0\1\25\11\0\1\25\1\33\2\25\1\32\4\25"+
+ "\5\0\1\25\2\0\1\25\11\0\4\25\1\32\1\25"+
+ "\1\34\2\25\5\0\1\25\1\0\10\31\1\35\22\31"+
+ "\1\0\1\25\11\0\2\25\1\36\1\25\1\32\4\25"+
+ "\5\0\1\25\2\0\1\25\11\0\4\25\1\32\2\25"+
+ "\1\37\1\25\5\0\1\25\2\0\1\25\11\0\3\25"+
+ "\1\40\1\32\4\25\5\0\1\25\2\0\1\25\11\0"+
+ "\4\25\1\32\3\25\1\40\5\0\1\25\2\0\1\25"+
+ "\11\0\4\25\1\11\4\25\5\0\1\25\1\0";
+
+ private static int [] zzUnpackTrans() {
+ int [] result = new int[540];
+ int offset = 0;
+ offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackTrans(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ value--;
+ do result[j++] = value; while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /* error codes */
+ private static final int ZZ_UNKNOWN_ERROR = 0;
+ private static final int ZZ_NO_MATCH = 1;
+ private static final int ZZ_PUSHBACK_2BIG = 2;
+
+ /* error messages for the codes above */
+ private static final String ZZ_ERROR_MSG[] = {
+ "Unkown internal scanner error",
+ "Error: could not match input",
+ "Error: pushback value was too large"
+ };
+
+ /**
+ * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+ */
+ private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+ private static final String ZZ_ATTRIBUTE_PACKED_0 =
+ "\1\0\1\11\2\1\2\11\1\1\2\11\2\1\5\11"+
+ "\2\1\5\0\1\11\1\0\1\11\2\0\1\11\3\0";
+
+ private static int [] zzUnpackAttribute() {
+ int [] result = new int[32];
+ int offset = 0;
+ offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackAttribute(String packed, int offset, int [] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ do result[j++] = value; while (--count > 0);
+ }
+ return j;
+ }
+
+ /** the input device */
+ private java.io.Reader zzReader;
+
+ /** the current state of the DFA */
+ private int zzState;
+
+ /** the current lexical state */
+ private int zzLexicalState = YYINITIAL;
+
+ /** this buffer contains the current text to be matched and is
+ the source of the yytext() string */
+ private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+ /** the textposition at the last accepting state */
+ private int zzMarkedPos;
+
+ /** the current text position in the buffer */
+ private int zzCurrentPos;
+
+ /** startRead marks the beginning of the yytext() string in the buffer */
+ private int zzStartRead;
+
+ /** endRead marks the last character in the buffer, that has been read
+ from input */
+ private int zzEndRead;
+
+ /** number of newlines encountered up to the start of the matched text */
+ private int yyline;
+
+ /** the number of characters up to the start of the matched text */
+ private int yychar;
+
+ /**
+ * the number of characters from the last newline up to the start of the
+ * matched text
+ */
+ private int yycolumn;
+
+ /**
+ * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ */
+ private boolean zzAtBOL = true;
+
+ /** zzAtEOF == true <=> the scanner is at the EOF */
+ private boolean zzAtEOF;
+
+ /** denotes if the user-EOF-code has already been executed */
+ private boolean zzEOFDone;
+
+ /* user code: */
+ private int number = 0;
+
+ private Map<String,String> tags = new HashMap<String,String>();
+ private JCas cas;
+ private final static Pattern tagPattern =
+ Pattern.compile("</?(\\w+)([^>]*)>");
+ private String splitAndPutInMap(String tag){
+ Matcher m = tagPattern.matcher(tag);
+ if(m.find()){
+ String name = m.group(1).toLowerCase();
+ tags.put(name,m.group(2));
+ return name;
+ } else {
+ return "!";
+ }
+ }
+ private void removeTag(String closingTag){
+ String cTag = closingTag.replace("</","");
+ cTag = cTag.replace(">","").toLowerCase();
+ tags.remove(cTag.trim());
+ }
+ public void setJCas(JCas cas) {
+ this.cas = cas;
+ }
+
+
+ /**
+ * Creates a new scanner
+ * There is also a java.io.InputStream version of this constructor.
+ *
+ * @param in the java.io.Reader to read input from.
+ */
+ SeedLexer(java.io.Reader in) {
+ this.zzReader = in;
+ }
+
+ /**
+ * Creates a new scanner.
+ * There is also java.io.Reader version of this constructor.
+ *
+ * @param in the java.io.Inputstream to read input from.
+ */
+ SeedLexer(java.io.InputStream in) {
+ this(new java.io.InputStreamReader(in));
+ }
+
+ /**
+ * Unpacks the compressed character translation table.
+ *
+ * @param packed the packed character translation table
+ * @return the unpacked character translation table
+ */
+ private static char [] zzUnpackCMap(String packed) {
+ char [] map = new char[0x10000];
+ int i = 0; /* index in packed string */
+ int j = 0; /* index in unpacked array */
+ while (i < 1808) {
+ int count = packed.charAt(i++);
+ char value = packed.charAt(i++);
+ do map[j++] = value; while (--count > 0);
+ }
+ return map;
+ }
+
+
+ /**
+ * Refills the input buffer.
+ *
+ * @return <code>false</code>, iff there was new input.
+ *
+ * @exception java.io.IOException if any I/O-Error occurs
+ */
+ private boolean zzRefill() throws java.io.IOException {
+
+ /* first: make room (if you can) */
+ if (zzStartRead > 0) {
+ System.arraycopy(zzBuffer, zzStartRead,
+ zzBuffer, 0,
+ zzEndRead-zzStartRead);
+
+ /* translate stored positions */
+ zzEndRead-= zzStartRead;
+ zzCurrentPos-= zzStartRead;
+ zzMarkedPos-= zzStartRead;
+ zzStartRead = 0;
+ }
+
+ /* is the buffer big enough? */
+ if (zzCurrentPos >= zzBuffer.length) {
+ /* if not: blow it up */
+ char newBuffer[] = new char[zzCurrentPos*2];
+ System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+ zzBuffer = newBuffer;
+ }
+
+ /* finally: fill the buffer with new input */
+ int numRead = zzReader.read(zzBuffer, zzEndRead,
+ zzBuffer.length-zzEndRead);
+
+ if (numRead > 0) {
+ zzEndRead+= numRead;
+ return false;
+ }
+ // unlikely but not impossible: read 0 characters, but not at end of stream
+ if (numRead == 0) {
+ int c = zzReader.read();
+ if (c == -1) {
+ return true;
+ } else {
+ zzBuffer[zzEndRead++] = (char) c;
+ return false;
+ }
+ }
+
+ // numRead < 0
+ return true;
+ }
+
+
+ /**
+ * Closes the input stream.
+ */
+ public final void yyclose() throws java.io.IOException {
+ zzAtEOF = true; /* indicate end of file */
+ zzEndRead = zzStartRead; /* invalidate buffer */
+
+ if (zzReader != null)
+ zzReader.close();
+ }
+
+
+ /**
+ * Resets the scanner to read from a new input stream.
+ * Does not close the old reader.
+ *
+ * All internal variables are reset, the old input stream
+ * <b>cannot</b> be reused (internal buffer is discarded and lost).
+ * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+ *
+ * @param reader the new input stream
+ */
+ public final void yyreset(java.io.Reader reader) {
+ zzReader = reader;
+ zzAtBOL = true;
+ zzAtEOF = false;
+ zzEOFDone = false;
+ zzEndRead = zzStartRead = 0;
+ zzCurrentPos = zzMarkedPos = 0;
+ yyline = yychar = yycolumn = 0;
+ zzLexicalState = YYINITIAL;
+ }
+
+
+ /**
+ * Returns the current lexical state.
+ */
+ public final int yystate() {
+ return zzLexicalState;
+ }
+
+
+ /**
+ * Enters a new lexical state
+ *
+ * @param newState the new lexical state
+ */
+ public final void yybegin(int newState) {
+ zzLexicalState = newState;
+ }
+
+
+ /**
+ * Returns the text matched by the current regular expression.
+ */
+ public final String yytext() {
+ return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+ }
+
+
+ /**
+ * Returns the character at position <tt>pos</tt> from the
+ * matched text.
+ *
+ * It is equivalent to yytext().charAt(pos), but faster
+ *
+ * @param pos the position of the character to fetch.
+ * A value from 0 to yylength()-1.
+ *
+ * @return the character at position pos
+ */
+ public final char yycharat(int pos) {
+ return zzBuffer[zzStartRead+pos];
+ }
+
+
+ /**
+ * Returns the length of the matched text region.
+ */
+ public final int yylength() {
+ return zzMarkedPos-zzStartRead;
+ }
+
+
+ /**
+ * Reports an error that occured while scanning.
+ *
+ * In a wellformed scanner (no or only correct usage of
+ * yypushback(int) and a match-all fallback rule) this method
+ * will only be called with things that "Can't Possibly Happen".
+ * If this method is called, something is seriously wrong
+ * (e.g. a JFlex bug producing a faulty scanner etc.).
+ *
+ * Usual syntax/scanner level error handling should be done
+ * in error fallback rules.
+ *
+ * @param errorCode the code of the errormessage to display
+ */
+ private void zzScanError(int errorCode) {
+ String message;
+ try {
+ message = ZZ_ERROR_MSG[errorCode];
+ }
+ catch (ArrayIndexOutOfBoundsException e) {
+ message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+ }
+
+ throw new Error(message);
+ }
+
+
+ /**
+ * Pushes the specified amount of characters back into the input stream.
+ *
+ * They will be read again by then next call of the scanning method
+ *
+ * @param number the number of characters to be read again.
+ * This number must not be greater than yylength()!
+ */
+ public void yypushback(int number) {
+ if ( number > yylength() )
+ zzScanError(ZZ_PUSHBACK_2BIG);
+
+ zzMarkedPos -= number;
+ }
+
+
+ /**
+ * Resumes scanning until the next regular expression is matched,
+ * the end of input is encountered or an I/O-Error occurs.
+ *
+ * @return the next token
+ * @exception java.io.IOException if any I/O-Error occurs
+ */
+ public TextMarkerBasic yylex() throws java.io.IOException {
+ int zzInput;
+ int zzAction;
+
+ // cached fields:
+ int zzCurrentPosL;
+ int zzMarkedPosL;
+ int zzEndReadL = zzEndRead;
+ char [] zzBufferL = zzBuffer;
+ char [] zzCMapL = ZZ_CMAP;
+
+ int [] zzTransL = ZZ_TRANS;
+ int [] zzRowMapL = ZZ_ROWMAP;
+ int [] zzAttrL = ZZ_ATTRIBUTE;
+
+ while (true) {
+ zzMarkedPosL = zzMarkedPos;
+
+ yychar+= zzMarkedPosL-zzStartRead;
+
+ boolean zzR = false;
+ for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL;
+ zzCurrentPosL++) {
+ switch (zzBufferL[zzCurrentPosL]) {
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ case '\u2028':
+ case '\u2029':
+ yyline++;
+ zzR = false;
+ break;
+ case '\r':
+ yyline++;
+ zzR = true;
+ break;
+ case '\n':
+ if (zzR)
+ zzR = false;
+ else {
+ yyline++;
+ }
+ break;
+ default:
+ zzR = false;
+ }
+ }
+
+ if (zzR) {
+ // peek one character ahead if it is \n (if we have counted one line too much)
+ boolean zzPeek;
+ if (zzMarkedPosL < zzEndReadL)
+ zzPeek = zzBufferL[zzMarkedPosL] == '\n';
+ else if (zzAtEOF)
+ zzPeek = false;
+ else {
+ boolean eof = zzRefill();
+ zzEndReadL = zzEndRead;
+ zzMarkedPosL = zzMarkedPos;
+ zzBufferL = zzBuffer;
+ if (eof)
+ zzPeek = false;
+ else
+ zzPeek = zzBufferL[zzMarkedPosL] == '\n';
+ }
+ if (zzPeek) yyline--;
+ }
+ zzAction = -1;
+
+ zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+
+ zzState = ZZ_LEXSTATE[zzLexicalState];
+
+
+ zzForAction: {
+ while (true) {
+
+ if (zzCurrentPosL < zzEndReadL)
+ zzInput = zzBufferL[zzCurrentPosL++];
+ else if (zzAtEOF) {
+ zzInput = YYEOF;
+ break zzForAction;
+ }
+ else {
+ // store back cached positions
+ zzCurrentPos = zzCurrentPosL;
+ zzMarkedPos = zzMarkedPosL;
+ boolean eof = zzRefill();
+ // get translated positions and possibly new buffer
+ zzCurrentPosL = zzCurrentPos;
+ zzMarkedPosL = zzMarkedPos;
+ zzBufferL = zzBuffer;
+ zzEndReadL = zzEndRead;
+ if (eof) {
+ zzInput = YYEOF;
+ break zzForAction;
+ }
+ else {
+ zzInput = zzBufferL[zzCurrentPosL++];
+ }
+ }
+ int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
+ if (zzNext == -1) break zzForAction;
+ zzState = zzNext;
+
+ int zzAttributes = zzAttrL[zzState];
+ if ( (zzAttributes & 1) == 1 ) {
+ zzAction = zzState;
+ zzMarkedPosL = zzCurrentPosL;
+ if ( (zzAttributes & 8) == 8 ) break zzForAction;
+ }
+
+ }
+ }
+
+ // store back cached position
+ zzMarkedPos = zzMarkedPosL;
+
+ switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+ case 16:
+ { AMP t = new AMP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 18: break;
+ case 13:
+ { QUESTION t = new QUESTION(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 19: break;
+ case 12:
+ { PERIOD t = new PERIOD(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 20: break;
+ case 7:
+ { NBSP t = new NBSP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 21: break;
+ case 10:
+ { COLON t = new COLON(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 22: break;
+ case 11:
+ { COMMA t = new COMMA(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 23: break;
+ case 4:
+ { SPACE t = new SPACE(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 24: break;
+ case 9:
+ { SEMICOLON t = new SEMICOLON(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 25: break;
+ case 8:
+ { SW t = new SW(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 26: break;
+ case 1:
+ { SPECIAL t = new SPECIAL(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 27: break;
+ case 14:
+ { CAP t = new CAP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 28: break;
+ case 17:
+ { removeTag(yytext());
+ MARKUP t = new MARKUP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 29: break;
+ case 15:
+ { String tag = splitAndPutInMap(yytext());
+ MARKUP t = new MARKUP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 30: break;
+ case 6:
+ { EXCLAMATION t = new EXCLAMATION(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 31: break;
+ case 3:
+ { NUM t = new NUM(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 32: break;
+ case 2:
+ { CW t = new CW(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 33: break;
+ case 5:
+ { BREAK t = new BREAK(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 34: break;
+ default:
+ if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+ zzAtEOF = true;
+ switch (zzLexicalState) {
+ case YYINITIAL: {
+ return null;
+ }
+ case 33: break;
+ default:
+ return null;
+ }
+ }
+ else {
+ zzScanError(ZZ_NO_MATCH);
+ }
+ }
+ }
+ }
+
+
+}
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SeedLexer.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SeedLexer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SourceLexer.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SourceLexer.java?rev=1152824&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SourceLexer.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SourceLexer.java Mon Aug 1 15:24:44 2011
@@ -0,0 +1,805 @@
+/* The following code was generated by JFlex 1.4.1 on 21.12.06 21:50 */
+
+/* this is the scanner example from the JLex website
+ (with small modifications to make it more readable) */
+package org.apache.uima.tm.textmarker.seed;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.tm.type.AMP;
+import org.apache.uima.tm.type.BREAK;
+import org.apache.uima.tm.type.CAP;
+import org.apache.uima.tm.type.COLON;
+import org.apache.uima.tm.type.COMMA;
+import org.apache.uima.tm.type.CW;
+import org.apache.uima.tm.type.EXCLAMATION;
+import org.apache.uima.tm.type.MARKUP;
+import org.apache.uima.tm.type.NBSP;
+import org.apache.uima.tm.type.NUM;
+import org.apache.uima.tm.type.PERIOD;
+import org.apache.uima.tm.type.QUESTION;
+import org.apache.uima.tm.type.SEMICOLON;
+import org.apache.uima.tm.type.SPACE;
+import org.apache.uima.tm.type.SPECIAL;
+import org.apache.uima.tm.type.SW;
+
+
+/**
+ * This class is a scanner generated by <a href="http://www.jflex.de/">JFlex</a> 1.4.1 on 21.12.06
+ * 21:50 from the specification file
+ */
+public class SourceLexer {
+
+ /** This character denotes the end of file */
+ public static final int YYEOF = -1;
+
+ /** initial size of the lookahead buffer */
+ private static final int ZZ_BUFFERSIZE = 16384;
+
+ /** lexical states */
+ public static final int YYINITIAL = 0;
+
+ public static final int COMMENT = 1;
+
+ /**
+ * Translates characters to character classes
+ */
+ private static final char[] ZZ_CMAP = { 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 4, 0, 0, 4, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 23, 0, 0, 0, 0, 10, 0, 0, 0, 6, 0, 21, 0, 22, 5, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 20, 15, 7, 0, 8, 24, 0, 1, 17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 16, 1, 19, 1, 1, 18, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 26, 12, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 11, 26, 14, 26, 26, 13, 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 0, 0, 27, 0,
+ 0, 25, 0, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 0, 0,
+ 25, 0, 0, 0 };
+
+ /**
+ * Translates DFA states to action switch labels.
+ */
+ private static final int[] ZZ_ACTION = zzUnpackAction();
+
+ private static final String ZZ_ACTION_PACKED_0 = "\1\0\1\1\1\2\1\3\1\4\1\5\1\6\2\2"
+ + "\1\7\1\2\1\10\1\11\1\12\1\13\1\14\1\15" + "\1\16\1\1\2\0\1\17\1\3\1\20\7\0\1\1"
+ + "\1\21\1\22\1\1\1\23\1\0\1\24\2\0\1\25" + "\3\0";
+
+ private static int[] zzUnpackAction() {
+ int[] result = new int[44];
+ int offset = 0;
+ offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackAction(String packed, int offset, int[] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ do
+ result[j++] = value;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+ /**
+ * Translates a state to a row index in the transition table
+ */
+ private static final int[] ZZ_ROWMAP = zzUnpackRowMap();
+
+ private static final String ZZ_ROWMAP_PACKED_0 = "\0\0\0\34\0\70\0\124\0\160\0\214\0\70\0\250"
+ + "\0\304\0\70\0\340\0\374\0\70\0\70\0\70\0\70"
+ + "\0\70\0\70\0\u0118\0\u0134\0\u0150\0\u016c\0\u0188\0\70"
+ + "\0\u01a4\0\u01c0\0\u01dc\0\u01f8\0\u0214\0\u0230\0\u024c\0\u0268"
+ + "\0\70\0\70\0\u0284\0\70\0\u02a0\0\70\0\u02bc\0\u02d8"
+ + "\0\70\0\u02f4\0\u0310\0\u032c";
+
+ private static int[] zzUnpackRowMap() {
+ int[] result = new int[44];
+ int offset = 0;
+ offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackRowMap(String packed, int offset, int[] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int high = packed.charAt(i++) << 16;
+ result[j++] = high | packed.charAt(i++);
+ }
+ return j;
+ }
+
+ /**
+ * The transition table of the DFA
+ */
+ private static final int[] ZZ_TRANS = zzUnpackTrans();
+
+ private static final String ZZ_TRANS_PACKED_0 = "\1\3\1\4\1\5\1\6\1\7\1\10\1\3\1\11"
+ + "\1\3\1\12\1\13\4\14\1\15\4\4\1\16\1\17" + "\1\20\1\21\1\22\2\14\1\4\5\23\1\24\1\25"
+ + "\25\23\35\0\1\26\11\0\4\27\1\0\4\26\5\0" + "\2\27\1\26\2\0\1\5\34\0\1\6\36\0\1\30"
+ + "\25\0\5\31\1\32\2\31\1\0\23\31\1\0\1\33" + "\11\0\1\34\3\33\1\0\1\35\3\33\6\0\1\33"
+ + "\14\0\4\14\12\0\2\14\1\0\5\23\1\36\1\37"
+ + "\32\23\1\40\1\41\32\23\1\42\1\43\25\23\1\0"
+ + "\1\26\16\0\4\26\7\0\1\26\13\0\4\27\12\0"
+ + "\2\27\1\0\10\31\1\44\23\31\10\45\1\44\23\45"
+ + "\1\0\1\33\11\0\4\33\1\46\4\33\6\0\1\33" + "\2\0\1\33\11\0\1\33\1\47\2\33\1\46\4\33"
+ + "\6\0\1\33\2\0\1\33\11\0\4\33\1\46\1\33" + "\1\50\2\33\6\0\1\33\1\0\5\23\1\40\1\0"
+ + "\32\23\1\0\1\43\32\23\1\40\1\37\32\23\1\36"
+ + "\1\43\25\23\10\45\1\51\23\45\1\0\1\33\11\0" + "\2\33\1\52\1\33\1\46\4\33\6\0\1\33\2\0"
+ + "\1\33\11\0\4\33\1\46\2\33\1\53\1\33\6\0" + "\1\33\2\0\1\33\11\0\3\33\1\54\1\46\4\33"
+ + "\6\0\1\33\2\0\1\33\11\0\4\33\1\46\3\33" + "\1\54\6\0\1\33\2\0\1\33\11\0\4\33\1\12"
+ + "\4\33\6\0\1\33\1\0";
+
+ private static int[] zzUnpackTrans() {
+ int[] result = new int[840];
+ int offset = 0;
+ offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackTrans(String packed, int offset, int[] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ value--;
+ do
+ result[j++] = value;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+ /* error codes */
+ private static final int ZZ_UNKNOWN_ERROR = 0;
+
+ private static final int ZZ_NO_MATCH = 1;
+
+ private static final int ZZ_PUSHBACK_2BIG = 2;
+
+ /* error messages for the codes above */
+ private static final String ZZ_ERROR_MSG[] = { "Unkown internal scanner error",
+ "Error: could not match input", "Error: pushback value was too large" };
+
+ /**
+ * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+ */
+ private static final int[] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+ private static final String ZZ_ATTRIBUTE_PACKED_0 = "\1\0\1\1\1\11\3\1\1\11\2\1\1\11\2\1"
+ + "\6\11\1\1\2\0\2\1\1\11\7\0\1\1\2\11" + "\1\1\1\11\1\0\1\11\2\0\1\11\3\0";
+
+ private static int[] zzUnpackAttribute() {
+ int[] result = new int[44];
+ int offset = 0;
+ offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int zzUnpackAttribute(String packed, int offset, int[] result) {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.length();
+ while (i < l) {
+ int count = packed.charAt(i++);
+ int value = packed.charAt(i++);
+ do
+ result[j++] = value;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+ /** the input device */
+ private java.io.Reader zzReader;
+
+ /** the current state of the DFA */
+ private int zzState;
+
+ /** the current lexical state */
+ private int zzLexicalState = YYINITIAL;
+
+ /**
+ * this buffer contains the current text to be matched and is the source of the yytext() string
+ */
+ private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+ /** the textposition at the last accepting state */
+ private int zzMarkedPos;
+
+ /** the textposition at the last state to be included in yytext */
+ private int zzPushbackPos;
+
+ /** the current text position in the buffer */
+ private int zzCurrentPos;
+
+ /** startRead marks the beginning of the yytext() string in the buffer */
+ private int zzStartRead;
+
+ /**
+ * endRead marks the last character in the buffer, that has been read from input
+ */
+ private int zzEndRead;
+
+ /** number of newlines encountered up to the start of the matched text */
+ private int yyline;
+
+ /** the number of characters up to the start of the matched text */
+ private int yychar;
+
+ /**
+ * the number of characters from the last newline up to the start of the matched text
+ */
+ private int yycolumn;
+
+ /**
+ * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ */
+ private boolean zzAtBOL = true;
+
+ /** zzAtEOF == true <=> the scanner is at the EOF */
+ private boolean zzAtEOF;
+
+ /* user code: */
+ private int number = 0;
+
+ private int comment_count = 0;
+
+ private Map<String, String> tags = new HashMap<String, String>();
+
+ private JCas cas;
+
+ private final static Pattern tagPattern = Pattern.compile("<([A-Za-z�������_0-9:]+)([^>]*)>"); // <font
+
+ // color='red'>
+
+ // --> group(1)=font
+
+ private void splitAndPutInMap(String tag) {
+ Matcher m = tagPattern.matcher(tag);
+ if (m.find()) {
+ tags.put(m.group(1).toLowerCase(), m.group(2));
+ }
+ }
+
+ private void removeTag(String closingTag) {
+ String cTag = closingTag.replace("</", "");
+
+ cTag = cTag.replace(">", "").toLowerCase();
+ tags.remove(cTag.trim());
+ }
+
+ // private String removeBrackets(String tag){
+ // String result = tag.replace("<","");
+ // return result.replace(">","").trim();
+ // }
+
+ /**
+ * Creates a new scanner There is also a java.io.InputStream version of this constructor.
+ *
+ * @param in
+ * the java.io.Reader to read input from.
+ */
+ public SourceLexer(java.io.Reader in, JCas cas) {
+ this.zzReader = in;
+ this.cas = cas;
+ }
+
+ /**
+ * Creates a new scanner. There is also java.io.Reader version of this constructor.
+ *
+ * @param in
+ * the java.io.Inputstream to read input from.
+ */
+ SourceLexer(java.io.InputStream in, JCas cas) {
+ this(new java.io.InputStreamReader(in), cas);
+ }
+
+ /**
+ * Refills the input buffer.
+ *
+ * @return <code>false</code>, iff there was new input.
+ *
+ * @exception java.io.IOException
+ * if any I/O-Error occurs
+ */
+ private boolean zzRefill() throws java.io.IOException {
+
+ /* first: make room (if you can) */
+ if (zzStartRead > 0) {
+ System.arraycopy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
+
+ /* translate stored positions */
+ zzEndRead -= zzStartRead;
+ zzCurrentPos -= zzStartRead;
+ zzMarkedPos -= zzStartRead;
+ zzPushbackPos -= zzStartRead;
+ zzStartRead = 0;
+ }
+
+ /* is the buffer big enough? */
+ if (zzCurrentPos >= zzBuffer.length) {
+ /* if not: blow it up */
+ char newBuffer[] = new char[zzCurrentPos * 2];
+ System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+ zzBuffer = newBuffer;
+ }
+
+ /* finally: fill the buffer with new input */
+ int numRead = zzReader.read(zzBuffer, zzEndRead, zzBuffer.length - zzEndRead);
+
+ if (numRead < 0) {
+ return true;
+ } else {
+ zzEndRead += numRead;
+ return false;
+ }
+ }
+
+ /**
+ * Closes the input stream.
+ */
+ public final void yyclose() throws java.io.IOException {
+ zzAtEOF = true; /* indicate end of file */
+ zzEndRead = zzStartRead; /* invalidate buffer */
+
+ if (zzReader != null)
+ zzReader.close();
+ }
+
+ /**
+ * Resets the scanner to read from a new input stream. Does not close the old reader.
+ *
+ * All internal variables are reset, the old input stream <b>cannot</b> be reused (internal buffer
+ * is discarded and lost). Lexical state is set to <tt>ZZ_INITIAL</tt>.
+ *
+ * @param reader
+ * the new input stream
+ */
+ public final void yyreset(java.io.Reader reader) {
+ zzReader = reader;
+ zzAtBOL = true;
+ zzAtEOF = false;
+ zzEndRead = zzStartRead = 0;
+ zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
+ yyline = yychar = yycolumn = 0;
+ zzLexicalState = YYINITIAL;
+ }
+
+ /**
+ * Returns the current lexical state.
+ */
+ public final int yystate() {
+ return zzLexicalState;
+ }
+
+ /**
+ * Enters a new lexical state
+ *
+ * @param newState
+ * the new lexical state
+ */
+ public final void yybegin(int newState) {
+ zzLexicalState = newState;
+ }
+
+ /**
+ * Returns the text matched by the current regular expression.
+ */
+ public final String yytext() {
+ return new String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+ }
+
+ /**
+ * Returns the character at position <tt>pos</tt> from the matched text.
+ *
+ * It is equivalent to yytext().charAt(pos), but faster
+ *
+ * @param pos
+ * the position of the character to fetch. A value from 0 to yylength()-1.
+ *
+ * @return the character at position pos
+ */
+ public final char yycharat(int pos) {
+ return zzBuffer[zzStartRead + pos];
+ }
+
+ /**
+ * Returns the length of the matched text region.
+ */
+ public final int yylength() {
+ return zzMarkedPos - zzStartRead;
+ }
+
+ /**
+ * Reports an error that occured while scanning.
+ *
+ * In a wellformed scanner (no or only correct usage of yypushback(int) and a match-all fallback
+ * rule) this method will only be called with things that "Can't Possibly Happen". If this method
+ * is called, something is seriously wrong (e.g. a JFlex bug producing a faulty scanner etc.).
+ *
+ * Usual syntax/scanner level error handling should be done in error fallback rules.
+ *
+ * @param errorCode
+ * the code of the errormessage to display
+ */
+ private void zzScanError(int errorCode) {
+ String message;
+ try {
+ message = ZZ_ERROR_MSG[errorCode];
+ } catch (ArrayIndexOutOfBoundsException e) {
+ message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+ }
+
+ throw new Error(message);
+ }
+
+ /**
+ * Pushes the specified amount of characters back into the input stream.
+ *
+ * They will be read again by then next call of the scanning method
+ *
+ * @param number
+ * the number of characters to be read again. This number must not be greater than
+ * yylength()!
+ */
+ public void yypushback(int number) {
+ if (number > yylength())
+ zzScanError(ZZ_PUSHBACK_2BIG);
+
+ zzMarkedPos -= number;
+ }
+
+ /**
+ * Resumes scanning until the next regular expression is matched, the end of input is encountered
+ * or an I/O-Error occurs.
+ *
+ * @return the next token
+ * @exception java.io.IOException
+ * if any I/O-Error occurs
+ */
+ public Annotation yylex() throws java.io.IOException {
+ int zzInput;
+ int zzAction;
+
+ // cached fields:
+ int zzCurrentPosL;
+ int zzMarkedPosL;
+ int zzEndReadL = zzEndRead;
+ char[] zzBufferL = zzBuffer;
+ char[] zzCMapL = ZZ_CMAP;
+
+ int[] zzTransL = ZZ_TRANS;
+ int[] zzRowMapL = ZZ_ROWMAP;
+ int[] zzAttrL = ZZ_ATTRIBUTE;
+
+ while (true) {
+ zzMarkedPosL = zzMarkedPos;
+
+ yychar += zzMarkedPosL - zzStartRead;
+
+ boolean zzR = false;
+ for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL++) {
+ switch (zzBufferL[zzCurrentPosL]) {
+ case '\u000B':
+ case '\u000C':
+ case '\u0085':
+ case '\u2028':
+ case '\u2029':
+ yyline++;
+ zzR = false;
+ break;
+ case '\r':
+ yyline++;
+ zzR = true;
+ break;
+ case '\n':
+ if (zzR)
+ zzR = false;
+ else {
+ yyline++;
+ }
+ break;
+ default:
+ zzR = false;
+ }
+ }
+
+ if (zzR) {
+ // peek one character ahead if it is \n (if we have counted one
+ // line too much)
+ boolean zzPeek;
+ if (zzMarkedPosL < zzEndReadL)
+ zzPeek = zzBufferL[zzMarkedPosL] == '\n';
+ else if (zzAtEOF)
+ zzPeek = false;
+ else {
+ boolean eof = zzRefill();
+ zzEndReadL = zzEndRead;
+ zzMarkedPosL = zzMarkedPos;
+ zzBufferL = zzBuffer;
+ if (eof)
+ zzPeek = false;
+ else
+ zzPeek = zzBufferL[zzMarkedPosL] == '\n';
+ }
+ if (zzPeek)
+ yyline--;
+ }
+ zzAction = -1;
+
+ zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+
+ zzState = zzLexicalState;
+
+ zzForAction: {
+ while (true) {
+
+ if (zzCurrentPosL < zzEndReadL)
+ zzInput = zzBufferL[zzCurrentPosL++];
+ else if (zzAtEOF) {
+ zzInput = YYEOF;
+ break zzForAction;
+ } else {
+ // store back cached positions
+ zzCurrentPos = zzCurrentPosL;
+ zzMarkedPos = zzMarkedPosL;
+ boolean eof = zzRefill();
+ // get translated positions and possibly new buffer
+ zzCurrentPosL = zzCurrentPos;
+ zzMarkedPosL = zzMarkedPos;
+ zzBufferL = zzBuffer;
+ zzEndReadL = zzEndRead;
+ if (eof) {
+ zzInput = YYEOF;
+ break zzForAction;
+ } else {
+ zzInput = zzBufferL[zzCurrentPosL++];
+ }
+ }
+ int i = zzRowMapL[zzState];
+ if (zzInput >= zzCMapL.length) {
+ System.out.println("here!!!");
+ return null;
+ }
+ char c = zzCMapL[zzInput];
+ int zzNext = zzTransL[i + c];
+ if (zzNext == -1)
+ break zzForAction;
+ zzState = zzNext;
+
+ int zzAttributes = zzAttrL[zzState];
+ if ((zzAttributes & 1) == 1) {
+ zzAction = zzState;
+ zzMarkedPosL = zzCurrentPosL;
+ if ((zzAttributes & 8) == 8)
+ break zzForAction;
+ }
+
+ }
+ }
+
+ // store back cached position
+ zzMarkedPos = zzMarkedPosL;
+
+ switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+ case 8: {
+ SW t = new SW(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 22:
+ break;
+ case 11: {
+ COMMA t = new COMMA(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 23:
+ break;
+ case 5: {
+ SPACE t = new SPACE(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 24:
+ break;
+ case 13: {
+ EXCLAMATION t = new EXCLAMATION(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 25:
+ break;
+ case 3: {
+ CW t = new CW(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 26:
+ break;
+ case 21: {
+ removeTag(yytext());
+ MARKUP t = new MARKUP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 27:
+ break;
+ case 19: {
+ splitAndPutInMap(yytext());
+ MARKUP t = new MARKUP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 28:
+ break;
+ case 16: {
+ yybegin(COMMENT);
+ comment_count++;
+ }
+ case 29:
+ break;
+ case 20: {
+ AMP t = new AMP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 30:
+ break;
+ case 18: {
+ if (--comment_count == 0)
+ yybegin(YYINITIAL);
+ }
+ case 31:
+ break;
+ case 7: {
+ NBSP t = new NBSP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 32:
+ break;
+ case 15: {
+ CAP t = new CAP(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 33:
+ break;
+ case 12: {
+ PERIOD t = new PERIOD(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 34:
+ break;
+ case 10: {
+ COLON t = new COLON(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 35:
+ break;
+ case 6: {
+ BREAK t = new BREAK(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 36:
+ break;
+ case 9: {
+ SEMICOLON t = new SEMICOLON(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 37:
+ break;
+ case 4: {
+ NUM t = new NUM(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 38:
+ break;
+ case 17: {
+ comment_count++;
+ }
+ case 39:
+ break;
+ case 2: {
+ SPECIAL t = new SPECIAL(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 40:
+ break;
+ case 14: {
+ QUESTION t = new QUESTION(cas);
+ t.setBegin(yychar);
+ t.setEnd(yychar + yytext().length());
+ t.setTags(tags);
+ return t;
+ }
+ case 41:
+ break;
+ case 1: {
+ }
+ case 42:
+ break;
+ default:
+ if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+ zzAtEOF = true;
+ switch (zzLexicalState) {
+ case YYINITIAL: {
+ // SPECIAL t = new SPECIAL(cas);
+ // t.setBegin(yychar);
+ // t.setEnd(yychar + yytext().length());
+ return null;
+ }
+ case 45:
+ break;
+ case COMMENT: {
+ // SPECIAL t = new SPECIAL(cas);
+ // t.setBegin(yychar);
+ // t.setEnd(yychar + yytext().length());
+ return null;
+ }
+ case 46:
+ break;
+ default:
+ return null;
+ }
+ } else {
+ zzScanError(ZZ_NO_MATCH);
+ }
+ }
+ }
+ }
+
+}
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SourceLexer.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/SourceLexer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/TextMarkerAnnotationSeeder.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/TextMarkerAnnotationSeeder.java?rev=1152824&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/TextMarkerAnnotationSeeder.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/TextMarkerAnnotationSeeder.java Mon Aug 1 15:24:44 2011
@@ -0,0 +1,9 @@
+package org.apache.uima.tm.textmarker.seed;
+
+import org.apache.uima.cas.CAS;
+
+public interface TextMarkerAnnotationSeeder {
+
+ void seed(String text, CAS cas);
+
+}
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/TextMarkerAnnotationSeeder.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/seed/TextMarkerAnnotationSeeder.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/utils/UIMAUtils.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/utils/UIMAUtils.java?rev=1152824&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/utils/UIMAUtils.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/utils/UIMAUtils.java Mon Aug 1 15:24:44 2011
@@ -0,0 +1,52 @@
+package org.apache.uima.tm.textmarker.utils;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.DoubleArray;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.IntegerArray;
+import org.apache.uima.jcas.cas.StringArray;
+
+public class UIMAUtils {
+
+ public static FSArray toFSArray(JCas jCas, List<? extends FeatureStructure> fsList) {
+ FSArray fsArray = new FSArray(jCas, fsList.size());
+ fsArray.copyFromArray(fsList.toArray(new FeatureStructure[fsList.size()]),
+ 0, 0, fsList.size());
+ return fsArray;
+ }
+
+ public static StringArray toStringArray(JCas jCas, String[] sArray) {
+ StringArray uimaSArray = new StringArray(jCas, sArray.length);
+ uimaSArray.copyFromArray(sArray, 0, 0, sArray.length);
+ return uimaSArray;
+ }
+
+ public static DoubleArray toDoubleArray(JCas jCas, double[] sArray) {
+ DoubleArray uimaSArray = new DoubleArray(jCas, sArray.length);
+ uimaSArray.copyFromArray(sArray, 0, 0, sArray.length);
+ return uimaSArray;
+ }
+
+ public static IntegerArray toIntegerArray(JCas jCas, int[] sArray) {
+ IntegerArray uimaSArray = new IntegerArray(jCas, sArray.length);
+ uimaSArray.copyFromArray(sArray, 0, 0, sArray.length);
+ return uimaSArray;
+ }
+
+ public static <T extends FeatureStructure> List<T> toList(FSArray fsArray, Class<T> cls) {
+ List<T> list = new ArrayList<T>();
+ if (fsArray == null) {
+ return list;
+ }
+ for (FeatureStructure fs : fsArray.toArray()) {
+ list.add(cls.cast(fs));
+ }
+ return list;
+
+ }
+
+}
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/utils/UIMAUtils.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/utils/UIMAUtils.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/verbalize/ActionVerbalizer.java
URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/verbalize/ActionVerbalizer.java?rev=1152824&view=auto
==============================================================================
--- uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/verbalize/ActionVerbalizer.java (added)
+++ uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/verbalize/ActionVerbalizer.java Mon Aug 1 15:24:44 2011
@@ -0,0 +1,325 @@
+package org.apache.uima.tm.textmarker.verbalize;
+
+import java.util.Map.Entry;
+
+import org.apache.uima.tm.textmarker.action.AbstractTextMarkerAction;
+import org.apache.uima.tm.textmarker.action.AddAction;
+import org.apache.uima.tm.textmarker.action.AssignAction;
+import org.apache.uima.tm.textmarker.action.CallAction;
+import org.apache.uima.tm.textmarker.action.ColorAction;
+import org.apache.uima.tm.textmarker.action.CreateAction;
+import org.apache.uima.tm.textmarker.action.DelAction;
+import org.apache.uima.tm.textmarker.action.ExpandAction;
+import org.apache.uima.tm.textmarker.action.FillAction;
+import org.apache.uima.tm.textmarker.action.FilterMarkupAction;
+import org.apache.uima.tm.textmarker.action.FilterTypeAction;
+import org.apache.uima.tm.textmarker.action.GatherAction;
+import org.apache.uima.tm.textmarker.action.GetAction;
+import org.apache.uima.tm.textmarker.action.GetFeatureAction;
+import org.apache.uima.tm.textmarker.action.LogAction;
+import org.apache.uima.tm.textmarker.action.MarkAction;
+import org.apache.uima.tm.textmarker.action.MarkFastAction;
+import org.apache.uima.tm.textmarker.action.MarkLastAction;
+import org.apache.uima.tm.textmarker.action.MarkOnceAction;
+import org.apache.uima.tm.textmarker.action.MergeAction;
+import org.apache.uima.tm.textmarker.action.RemoveAction;
+import org.apache.uima.tm.textmarker.action.RemoveDuplicateAction;
+import org.apache.uima.tm.textmarker.action.ReplaceAction;
+import org.apache.uima.tm.textmarker.action.RetainMarkupAction;
+import org.apache.uima.tm.textmarker.action.RetainTypeAction;
+import org.apache.uima.tm.textmarker.action.SetFeatureAction;
+import org.apache.uima.tm.textmarker.action.TransferAction;
+import org.apache.uima.tm.textmarker.action.TrieAction;
+import org.apache.uima.tm.textmarker.action.UnmarkAction;
+import org.apache.uima.tm.textmarker.kernel.expression.TextMarkerExpression;
+import org.apache.uima.tm.textmarker.kernel.expression.string.StringExpression;
+
+
+public class ActionVerbalizer {
+
+ private TextMarkerVerbalizer verbalizer;
+
+ public ActionVerbalizer(TextMarkerVerbalizer verbalizer) {
+ super();
+ this.verbalizer = verbalizer;
+ }
+
+ public String verbalizeName(AbstractTextMarkerAction action) {
+ if (action instanceof AddAction) {
+ return "ADD";
+ } else if (action instanceof AssignAction) {
+ return "ASSIGN";
+ } else if (action instanceof CallAction) {
+ return "CALL";
+ } else if (action instanceof ColorAction) {
+ return "COLOR";
+ } else if (action instanceof CreateAction) {
+ return "CREATE";
+ } else if (action instanceof DelAction) {
+ return "DEL";
+ } else if (action instanceof FillAction) {
+ return "FILL";
+ } else if (action instanceof FilterMarkupAction) {
+ return "FILTERMARKUP";
+ } else if (action instanceof FilterTypeAction) {
+ return "FILTERTYPE";
+ } else if (action instanceof LogAction) {
+ return "LOG";
+ } else if (action instanceof MarkOnceAction) {
+ return "MARKONCE";
+ } else if (action instanceof ExpandAction) {
+ return "EXPAND";
+ } else if (action instanceof MarkAction) {
+ MarkAction a = (MarkAction) action;
+ if (a.getScore() != null) {
+ return "MARKSCORE";
+ }
+ return "MARK";
+ } else if (action instanceof MarkFastAction) {
+ return "MARKFAST";
+ } else if (action instanceof MarkLastAction) {
+ return "MARKLAST";
+ } else if (action instanceof ReplaceAction) {
+ return "REPLACE";
+ } else if (action instanceof RetainMarkupAction) {
+ return "RETAINMARKUP";
+ } else if (action instanceof RetainTypeAction) {
+ return "RETAINTYPE";
+ } else if (action instanceof SetFeatureAction) {
+ String name = "SETFEATURE";
+ return name;
+ } else if (action instanceof GetFeatureAction) {
+ String name = "GETFEATURE";
+ return name;
+ } else if (action instanceof UnmarkAction) {
+ return "UNMARK";
+ } else if (action instanceof TransferAction) {
+ return "TRANSFER";
+ } else if (action instanceof TrieAction) {
+ return "TRIE";
+ } else if (action instanceof GatherAction) {
+ return "GATHER";
+ } else if (action instanceof MergeAction) {
+ return "MERGE";
+ } else if (action instanceof GetAction) {
+ return "GET";
+ } else if (action instanceof RemoveAction) {
+ return "REMOVE";
+ } else if (action instanceof RemoveDuplicateAction) {
+ return "REMOVEDUPLICATE";
+ }
+ return action.getClass().getSimpleName();
+ }
+
+ public String verbalize(AbstractTextMarkerAction action) {
+ if (action instanceof AssignAction) {
+ AssignAction a = (AssignAction) action;
+ return "ASSIGN(" + a.getVar() + "," + verbalizer.verbalize(a.getExpression()) + ")";
+ } else if (action instanceof CallAction) {
+ CallAction a = (CallAction) action;
+ return "CALL(" + a.getNamespace() + ")";
+ } else if (action instanceof ColorAction) {
+ ColorAction a = (ColorAction) action;
+ return "COLOR(" + verbalizer.verbalize(a.getType()) + ","
+ + verbalizer.verbalize(a.getBgColor()) + "," + verbalizer.verbalize(a.getFgColor())
+ + "," + verbalizer.verbalize(a.getSelected()) + ")";
+ } else if (action instanceof CreateAction) {
+ CreateAction a = (CreateAction) action;
+ StringBuilder features = new StringBuilder();
+ if (a.getFeatures() != null) {
+ features.append(",");
+ for (Entry<StringExpression, TextMarkerExpression> each : a.getFeatures().entrySet()) {
+ features.append(verbalizer.verbalize(each.getKey()));
+ features.append("=");
+ features.append(verbalizer.verbalize(each.getValue()));
+ features.append(",");
+ }
+ }
+ String feats = features.toString();
+ if (feats.endsWith(",")) {
+ feats = feats.substring(0, features.length() - 1);
+ }
+ String indexes = "";
+ if (a.getIndexes() != null) {
+ indexes = verbalizer.verbalizeExpressionList(a.getIndexes());
+ indexes += ", ";
+ }
+ return "CREATE(" + verbalizer.verbalize(a.getStructureType()) + indexes + feats + ")";
+ } else if (action instanceof GatherAction) {
+ GatherAction a = (GatherAction) action;
+ String features = "";
+ if (a.getFeatures() != null) {
+ features += ", ";
+ for (StringExpression each : a.getFeatures().keySet()) {
+ features += verbalizer.verbalize(each);
+ features += "=";
+ features += verbalizer.verbalize(a.getFeatures().get(each));
+ features += ", ";
+ }
+ }
+ if (features.endsWith(", ")) {
+ features = features.substring(0, features.length() - 2);
+ }
+ String indexes = "";
+ if (a.getIndexes() != null) {
+ indexes += ", ";
+ indexes = verbalizer.verbalizeExpressionList(a.getIndexes());
+ }
+ if (indexes.endsWith(", ") && !a.getFeatures().isEmpty()) {
+ indexes = indexes.substring(0, features.length() - 1);
+ }
+ return "GATHER(" + verbalizer.verbalize(a.getStructureType()) + ", " + indexes + features
+ + ")";
+ } else if (action instanceof DelAction) {
+ return "DEL";
+ } else if (action instanceof FillAction) {
+ FillAction a = (FillAction) action;
+ String features = "";
+ if (a.getFeatures() != null) {
+ features += ",";
+ for (StringExpression each : a.getFeatures().keySet()) {
+ features += verbalizer.verbalize(each);
+ features += "=";
+ features += verbalizer.verbalize(a.getFeatures().get(each));
+ features += ",";
+ }
+ }
+ if (features.endsWith(",")) {
+ features = features.substring(0, features.length() - 1);
+ }
+ return "FILL(" + verbalizer.verbalize(a.getStructureType()) + features + ")";
+ } else if (action instanceof FilterMarkupAction) {
+ FilterMarkupAction a = (FilterMarkupAction) action;
+ return a.getMarkup().isEmpty() ? "FILTERMARKUP" : "FILTERMARKUP("
+ + verbalizer.verbalizeExpressionList(a.getMarkup()) + ")";
+ } else if (action instanceof FilterTypeAction) {
+ FilterTypeAction a = (FilterTypeAction) action;
+ return a.getList().isEmpty() ? "FILTERTYPE" : "FILTERTYPE("
+ + verbalizer.verbalizeExpressionList(a.getList()) + ")";
+ } else if (action instanceof LogAction) {
+ LogAction a = (LogAction) action;
+ return "LOG(" + verbalizer.verbalize(a.getText()) + "," + a.getLevel() + ")";
+ } else if (action instanceof MarkOnceAction) {
+ MarkOnceAction a = (MarkOnceAction) action;
+ // String score = verbalizer.verbalize(a.getScore());
+ // if (!"".equals(score)) {
+ // score += ",";
+ // }
+ String string = "";
+ if (a.getList() != null && !a.getList().isEmpty()) {
+ string = "," + verbalizer.verbalizeExpressionList(a.getList());
+ }
+ return "MARKONCE(" + verbalizer.verbalize(a.getType()) + string + ")";
+ } else if (action instanceof ExpandAction) {
+ ExpandAction a = (ExpandAction) action;
+ String string = "";
+ if (a.getList() != null && !a.getList().isEmpty()) {
+ string = "," + verbalizer.verbalizeExpressionList(a.getList());
+ }
+ return "EXPAND(" + verbalizer.verbalize(a.getType()) + string + ")";
+ } else if (action instanceof MarkAction) {
+ MarkAction a = (MarkAction) action;
+ if (a.getScore() != null) {
+ String score = verbalizer.verbalize(a.getScore());
+ if (!"".equals(score)) {
+ score += ",";
+ }
+ String string = "";
+ if (a.getList() != null && !a.getList().isEmpty()) {
+ string = "," + verbalizer.verbalizeExpressionList(a.getList());
+ }
+ return "MARKSCORE(" + score + verbalizer.verbalize(a.getType()) + string + ")";
+ } else {
+ String string = "";
+ if (a.getList() != null && !a.getList().isEmpty()) {
+ string = "," + verbalizer.verbalizeExpressionList(a.getList());
+ }
+ return "MARK(" + verbalizer.verbalize(a.getType()) + string + ")";
+ }
+ } else if (action instanceof MarkFastAction) {
+ MarkFastAction a = (MarkFastAction) action;
+ return "MARKFAST(" + verbalizer.verbalize(a.getType()) + ","
+ + verbalizer.verbalize(a.getList()) + ")";
+ } else if (action instanceof MarkLastAction) {
+ MarkLastAction a = (MarkLastAction) action;
+ return "MARKLAST(" + verbalizer.verbalize(a.getType()) + ")";
+ } else if (action instanceof ReplaceAction) {
+ ReplaceAction a = (ReplaceAction) action;
+ return "REPLACE(" + verbalizer.verbalize(a.getReplacement()) + ")";
+ } else if (action instanceof RetainMarkupAction) {
+ RetainMarkupAction a = (RetainMarkupAction) action;
+ return a.getMarkup().isEmpty() ? "RETAINMARKUP" : "RETAINMARKUP("
+ + verbalizer.verbalizeExpressionList(a.getMarkup()) + ")";
+ } else if (action instanceof RetainTypeAction) {
+ RetainTypeAction a = (RetainTypeAction) action;
+ return a.getList().isEmpty() ? "RETAINTYPE" : "RETAINTYPE("
+ + verbalizer.verbalizeExpressionList(a.getList()) + ")";
+ } else if (action instanceof SetFeatureAction) {
+ SetFeatureAction a = (SetFeatureAction) action;
+ String e1 = verbalizer.verbalize(a.getFeatureStringExpression());
+ String name = "";
+ String e2 = "";
+ if (a.getBooleanExpr() != null) {
+ name = "SETFEATURE(";
+ e2 = verbalizer.verbalize(a.getBooleanExpr());
+ } else if (a.getNumberExpr() != null) {
+ name = "SETFEATURE(";
+ e2 = verbalizer.verbalize(a.getNumberExpr());
+ } else if (a.getStringExpr() != null) {
+ name = "SETFEATURE(";
+ e2 = verbalizer.verbalize(a.getStringExpr());
+ }
+ return name + e1 + "," + e2 + ")";
+ } else if (action instanceof GetFeatureAction) {
+ GetFeatureAction a = (GetFeatureAction) action;
+ String name = "GETFEATURE(";
+ return name + verbalizer.verbalize(a.getFeatureStringExpression()) + "," + a.getVariable()
+ + ")";
+ } else if (action instanceof UnmarkAction) {
+ UnmarkAction a = (UnmarkAction) action;
+ return "UNMARK(" + verbalizer.verbalize(a.getType()) + ")";
+ } else if (action instanceof TransferAction) {
+ TransferAction a = (TransferAction) action;
+ return "TRANSFER(" + verbalizer.verbalize(a.getType()) + ")";
+
+ } else if (action instanceof TrieAction) {
+ TrieAction a = (TrieAction) action;
+ String map = "";
+ if (a.getMap() != null) {
+ map += ",";
+ for (StringExpression each : a.getMap().keySet()) {
+ map += verbalizer.verbalize(each);
+ map += "=";
+ map += verbalizer.verbalize(a.getMap().get(each));
+ map += ",";
+ }
+ }
+ return "TRIE(" + map + verbalizer.verbalize(a.getList()) + ","
+ + verbalizer.verbalize(a.getIgnoreCase()) + ","
+ + verbalizer.verbalize(a.getIgnoreLength()) + "," + verbalizer.verbalize(a.getEdit())
+ + "," + verbalizer.verbalize(a.getDistance()) + ","
+ + verbalizer.verbalize(a.getIgnoreChar()) + ")";
+ } else if (action instanceof AddAction) {
+ AddAction a = (AddAction) action;
+ return "ADD(" + a.getListExpr() + "," + verbalizer.verbalizeExpressionList(a.getElements())
+ + ")";
+ } else if (action instanceof RemoveAction) {
+ RemoveAction a = (RemoveAction) action;
+ return "REMOVE(" + a.getListExpr() + ","
+ + verbalizer.verbalizeExpressionList(a.getElements()) + ")";
+ } else if (action instanceof RemoveAction) {
+ RemoveAction a = (RemoveAction) action;
+ return "REMOVEDUPLICATE(" + a.getListExpr() + ")";
+ } else if (action instanceof MergeAction) {
+ MergeAction a = (MergeAction) action;
+ return "MERGE(" + verbalizer.verbalize(a.getUnion()) + "," + a.getTarget() + ","
+ + verbalizer.verbalizeExpressionList(a.getLists()) + ")";
+ } else if (action instanceof GetAction) {
+ GetAction a = (GetAction) action;
+ return "GET(" + verbalizer.verbalize(a.getListExpr()) + "," + a.getVar() + ","
+ + verbalizer.verbalize(a.getOpExpr()) + ")";
+ }
+
+ return action.getClass().getSimpleName();
+ }
+}
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/verbalize/ActionVerbalizer.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: uima/sandbox/trunk/TextMarker/org.apache.uima.tm.textmarker.engine/src/main/java/org/apache/uima/tm/textmarker/verbalize/ActionVerbalizer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain