You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2012/10/31 06:26:55 UTC
svn commit: r1403989 [11/28] - in /incubator/ctakes/branches/SHARPn-cTAKES:
Constituency Parser/src/org/chboston/cnlp/ctakes/parser/ Constituency
Parser/src/org/chboston/cnlp/ctakes/parser/uima/ae/ Constituency
Parser/src/org/chboston/cnlp/ctakes/parse...
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/WordToken.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/WordToken.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/WordToken.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/WordToken.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,22 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token;
-
-public interface WordToken extends TextToken {
- // constants
- public static final byte CAPS_UNKNOWN = 0;
- public static final byte CAPS_NONE = 1;
- public static final byte CAPS_MIXED = 2;
- public static final byte CAPS_FIRST_ONLY = 3;
- public static final byte CAPS_ALL = 4;
-
- public static final byte NUM_NONE = 0;
- public static final byte NUM_FIRST = 1;
- public static final byte NUM_MIDDLE = 2;
- public static final byte NUM_LAST = 3;
-
- public byte getCaps();
-
- public byte getNumPosition();
-}
+package edu.mayo.bmi.fsm.token;
+
+public interface WordToken extends TextToken {
+ // constants
+ public static final byte CAPS_UNKNOWN = 0;
+ public static final byte CAPS_NONE = 1;
+ public static final byte CAPS_MIXED = 2;
+ public static final byte CAPS_FIRST_ONLY = 3;
+ public static final byte CAPS_ALL = 4;
+
+ public static final byte NUM_NONE = 0;
+ public static final byte NUM_FIRST = 1;
+ public static final byte NUM_MIDDLE = 2;
+ public static final byte NUM_LAST = 3;
+
+ public byte getCaps();
+
+ public byte getNumPosition();
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/BaseTokenAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/BaseTokenAdapter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/BaseTokenAdapter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/BaseTokenAdapter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,23 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token.adapter;
-
-import edu.mayo.bmi.fsm.token.BaseToken;
-import edu.mayo.bmi.nlp.tokenizer.Token;
-
-public class BaseTokenAdapter implements BaseToken {
- private Token iv_tok;
-
- public BaseTokenAdapter(Token tok) {
- iv_tok = tok;
- }
-
- public int getStartOffset() {
- return iv_tok.getStartOffset();
- }
-
- public int getEndOffset() {
- return iv_tok.getEndOffset();
- }
-}
+package edu.mayo.bmi.fsm.token.adapter;
+
+import edu.mayo.bmi.fsm.token.BaseToken;
+import edu.mayo.bmi.nlp.tokenizer.Token;
+
+public class BaseTokenAdapter implements BaseToken {
+ private Token iv_tok;
+
+ public BaseTokenAdapter(Token tok) {
+ iv_tok = tok;
+ }
+
+ public int getStartOffset() {
+ return iv_tok.getStartOffset();
+ }
+
+ public int getEndOffset() {
+ return iv_tok.getEndOffset();
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/CharacterTokenAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/CharacterTokenAdapter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/CharacterTokenAdapter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/CharacterTokenAdapter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,22 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token.adapter;
-
-import edu.mayo.bmi.fsm.token.CharacterToken;
-import edu.mayo.bmi.nlp.tokenizer.Token;
-
-public class CharacterTokenAdapter extends BaseTokenAdapter implements
- CharacterToken {
- private char iv_char;
-
- public CharacterTokenAdapter(Token tok) {
- super(tok);
- iv_char = tok.getText().charAt(0);
- }
-
- public char getChar() {
- return iv_char;
- }
-
-}
+package edu.mayo.bmi.fsm.token.adapter;
+
+import edu.mayo.bmi.fsm.token.CharacterToken;
+import edu.mayo.bmi.nlp.tokenizer.Token;
+
+public class CharacterTokenAdapter extends BaseTokenAdapter implements
+ CharacterToken {
+ private char iv_char;
+
+ public CharacterTokenAdapter(Token tok) {
+ super(tok);
+ iv_char = tok.getText().charAt(0);
+ }
+
+ public char getChar() {
+ return iv_char;
+ }
+
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/DecimalTokenAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/DecimalTokenAdapter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/DecimalTokenAdapter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/DecimalTokenAdapter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,23 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token.adapter;
-
-import edu.mayo.bmi.fsm.token.DecimalToken;
-import edu.mayo.bmi.nlp.tokenizer.Token;
-
-public class DecimalTokenAdapter extends NumberTokenAdapter implements
- DecimalToken {
- private double iv_val;
-
- public DecimalTokenAdapter(Token tok) {
- super(tok);
-
- iv_val = Double.parseDouble(removeCommas(tok.getText()));
- }
-
- public double getValue() {
- return iv_val;
- }
-
-}
+package edu.mayo.bmi.fsm.token.adapter;
+
+import edu.mayo.bmi.fsm.token.DecimalToken;
+import edu.mayo.bmi.nlp.tokenizer.Token;
+
+public class DecimalTokenAdapter extends NumberTokenAdapter implements
+ DecimalToken {
+ private double iv_val;
+
+ public DecimalTokenAdapter(Token tok) {
+ super(tok);
+
+ iv_val = Double.parseDouble(removeCommas(tok.getText()));
+ }
+
+ public double getValue() {
+ return iv_val;
+ }
+
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/IntegerTokenAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/IntegerTokenAdapter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/IntegerTokenAdapter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/IntegerTokenAdapter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/NumberTokenAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/NumberTokenAdapter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/NumberTokenAdapter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/NumberTokenAdapter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,34 +14,34 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token.adapter;
-
-import edu.mayo.bmi.fsm.token.NumberToken;
-import edu.mayo.bmi.nlp.tokenizer.Token;
-
-public class NumberTokenAdapter extends BaseTokenAdapter implements NumberToken {
- private boolean iv_isPositive = true;
-
- public NumberTokenAdapter(Token tok) {
- super(tok);
- if (tok.getText().charAt(0) == '-') {
- iv_isPositive = false;
- }
- }
-
- public boolean getPositive() {
- return iv_isPositive;
- }
-
- protected String removeCommas(String str) {
- StringBuffer sb = new StringBuffer(str);
- for (int i = 0; i < sb.length(); i++) {
- char currentChar = sb.charAt(i);
- if (currentChar == ',') {
- sb.deleteCharAt(i);
- i--;
- }
- }
- return sb.toString();
- }
-}
+package edu.mayo.bmi.fsm.token.adapter;
+
+import edu.mayo.bmi.fsm.token.NumberToken;
+import edu.mayo.bmi.nlp.tokenizer.Token;
+
+public class NumberTokenAdapter extends BaseTokenAdapter implements NumberToken {
+ private boolean iv_isPositive = true;
+
+ public NumberTokenAdapter(Token tok) {
+ super(tok);
+ if (tok.getText().charAt(0) == '-') {
+ iv_isPositive = false;
+ }
+ }
+
+ public boolean getPositive() {
+ return iv_isPositive;
+ }
+
+ protected String removeCommas(String str) {
+ StringBuffer sb = new StringBuffer(str);
+ for (int i = 0; i < sb.length(); i++) {
+ char currentChar = sb.charAt(i);
+ if (currentChar == ',') {
+ sb.deleteCharAt(i);
+ i--;
+ }
+ }
+ return sb.toString();
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/PunctuationTokenAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/PunctuationTokenAdapter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/PunctuationTokenAdapter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/PunctuationTokenAdapter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,21 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token.adapter;
-
-import edu.mayo.bmi.fsm.token.PunctuationToken;
-import edu.mayo.bmi.nlp.tokenizer.Token;
-
-/**
- * Adapts a Context Free Tokenizer token into a Context Dependent Tokenizer
- * PunctuationToken.
- *
- * @author Mayo Clinic
- *
- */
-public class PunctuationTokenAdapter extends CharacterTokenAdapter implements
- PunctuationToken {
- public PunctuationTokenAdapter(Token tok) {
- super(tok);
- }
-}
+package edu.mayo.bmi.fsm.token.adapter;
+
+import edu.mayo.bmi.fsm.token.PunctuationToken;
+import edu.mayo.bmi.nlp.tokenizer.Token;
+
+/**
+ * Adapts a Context Free Tokenizer token into a Context Dependent Tokenizer
+ * PunctuationToken.
+ *
+ * @author Mayo Clinic
+ *
+ */
+public class PunctuationTokenAdapter extends CharacterTokenAdapter implements
+ PunctuationToken {
+ public PunctuationTokenAdapter(Token tok) {
+ super(tok);
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/TokenConverter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/TokenConverter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/TokenConverter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/TokenConverter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,51 +14,51 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token.adapter;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import edu.mayo.bmi.fsm.token.BaseToken;
-import edu.mayo.bmi.fsm.token.DecimalToken;
-import edu.mayo.bmi.fsm.token.IntegerToken;
-import edu.mayo.bmi.fsm.token.PunctuationToken;
-import edu.mayo.bmi.fsm.token.WordToken;
-import edu.mayo.bmi.nlp.tokenizer.Token;
-
-public class TokenConverter {
-
- public static List<BaseToken> convertTokens(List<Token> tokens) {
- List<BaseToken> baseTokens = new ArrayList<BaseToken>();
-
- for (int i = 0; i < tokens.size(); i++) {
- Token t = (Token) tokens.get(i);
- switch (t.getType()) {
- case Token.TYPE_WORD:
- WordToken wt = new WordTokenAdapter(t);
- baseTokens.add(wt);
- break;
- case Token.TYPE_PUNCT:
- PunctuationToken pt = new PunctuationTokenAdapter(t);
- baseTokens.add(pt);
- break;
- case Token.TYPE_NUMBER:
- if (t.isInteger()) {
- IntegerToken it = new IntegerTokenAdapter(t);
- baseTokens.add(it);
- } else {
- DecimalToken dt = new DecimalTokenAdapter(t);
- baseTokens.add(dt);
- }
- break;
- case Token.TYPE_EOL:
- break;
- case Token.TYPE_CONTRACTION:
- break;
- }
- }
-
- return baseTokens;
- }
-
-}
+package edu.mayo.bmi.fsm.token.adapter;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import edu.mayo.bmi.fsm.token.BaseToken;
+import edu.mayo.bmi.fsm.token.DecimalToken;
+import edu.mayo.bmi.fsm.token.IntegerToken;
+import edu.mayo.bmi.fsm.token.PunctuationToken;
+import edu.mayo.bmi.fsm.token.WordToken;
+import edu.mayo.bmi.nlp.tokenizer.Token;
+
+public class TokenConverter {
+
+ public static List<BaseToken> convertTokens(List<Token> tokens) {
+ List<BaseToken> baseTokens = new ArrayList<BaseToken>();
+
+ for (int i = 0; i < tokens.size(); i++) {
+ Token t = (Token) tokens.get(i);
+ switch (t.getType()) {
+ case Token.TYPE_WORD:
+ WordToken wt = new WordTokenAdapter(t);
+ baseTokens.add(wt);
+ break;
+ case Token.TYPE_PUNCT:
+ PunctuationToken pt = new PunctuationTokenAdapter(t);
+ baseTokens.add(pt);
+ break;
+ case Token.TYPE_NUMBER:
+ if (t.isInteger()) {
+ IntegerToken it = new IntegerTokenAdapter(t);
+ baseTokens.add(it);
+ } else {
+ DecimalToken dt = new DecimalTokenAdapter(t);
+ baseTokens.add(dt);
+ }
+ break;
+ case Token.TYPE_EOL:
+ break;
+ case Token.TYPE_CONTRACTION:
+ break;
+ }
+ }
+
+ return baseTokens;
+ }
+
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/WordTokenAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/WordTokenAdapter.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/WordTokenAdapter.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/fsm/token/adapter/WordTokenAdapter.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,57 +14,57 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.fsm.token.adapter;
-
-import edu.mayo.bmi.fsm.token.WordToken;
-import edu.mayo.bmi.nlp.tokenizer.Token;
-
-/**
- * Adapts a Context Free Tokenizer token into a Context Dependent Tokenizer
- * WordToken.
- *
- * @author Mayo Clinic
- *
- */
-public class WordTokenAdapter extends BaseTokenAdapter implements WordToken {
- private Token iv_tok;
-
- public WordTokenAdapter(Token tok) {
- super(tok);
- iv_tok = tok;
- }
-
- public String getText() {
- return iv_tok.getText();
- }
-
- public byte getCaps() {
- byte caps = iv_tok.getCaps();
- switch (caps) {
- case Token.CAPS_ALL:
- return WordToken.CAPS_ALL;
- case Token.CAPS_FIRST_ONLY:
- return WordToken.CAPS_FIRST_ONLY;
- case Token.CAPS_MIXED:
- return WordToken.CAPS_MIXED;
- case Token.CAPS_NONE:
- return WordToken.CAPS_NONE;
- default:
- return WordToken.CAPS_UNKNOWN;
- }
- }
-
- public byte getNumPosition() {
- byte numPos = iv_tok.getNumPosition();
- switch (numPos) {
- case Token.NUM_FIRST:
- return WordToken.NUM_FIRST;
- case Token.NUM_LAST:
- return WordToken.NUM_LAST;
- case Token.NUM_MIDDLE:
- return WordToken.NUM_MIDDLE;
- default:
- return WordToken.NUM_NONE;
- }
- }
-}
+package edu.mayo.bmi.fsm.token.adapter;
+
+import edu.mayo.bmi.fsm.token.WordToken;
+import edu.mayo.bmi.nlp.tokenizer.Token;
+
+/**
+ * Adapts a Context Free Tokenizer token into a Context Dependent Tokenizer
+ * WordToken.
+ *
+ * @author Mayo Clinic
+ *
+ */
+public class WordTokenAdapter extends BaseTokenAdapter implements WordToken {
+ private Token iv_tok;
+
+ public WordTokenAdapter(Token tok) {
+ super(tok);
+ iv_tok = tok;
+ }
+
+ public String getText() {
+ return iv_tok.getText();
+ }
+
+ public byte getCaps() {
+ byte caps = iv_tok.getCaps();
+ switch (caps) {
+ case Token.CAPS_ALL:
+ return WordToken.CAPS_ALL;
+ case Token.CAPS_FIRST_ONLY:
+ return WordToken.CAPS_FIRST_ONLY;
+ case Token.CAPS_MIXED:
+ return WordToken.CAPS_MIXED;
+ case Token.CAPS_NONE:
+ return WordToken.CAPS_NONE;
+ default:
+ return WordToken.CAPS_UNKNOWN;
+ }
+ }
+
+ public byte getNumPosition() {
+ byte numPos = iv_tok.getNumPosition();
+ switch (numPos) {
+ case Token.NUM_FIRST:
+ return WordToken.NUM_FIRST;
+ case Token.NUM_LAST:
+ return WordToken.NUM_LAST;
+ case Token.NUM_MIDDLE:
+ return WordToken.NUM_MIDDLE;
+ default:
+ return WordToken.NUM_NONE;
+ }
+ }
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionResult.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionResult.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionResult.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionResult.java Wed Oct 31 05:26:43 2012
@@ -1,46 +1,39 @@
-/*
- * Copyright: (c) 2011 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.mayo.bmi.nlp.tokenizer;
-
-public class ContractionResult {
-
- private int wordTokenLen;
- private int contractionTokenLen;
-
- public void setContractionTokenLen(int contractionTokenLen) {
- this.contractionTokenLen = contractionTokenLen;
- }
- public int getContractionTokenLen() {
- return contractionTokenLen;
- }
-
- public void setWordTokenLen(int wordTokenLen) {
- this.wordTokenLen = wordTokenLen;
- }
- public int getWordTokenLen() {
- return wordTokenLen;
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.mayo.bmi.nlp.tokenizer;
+
+public class ContractionResult {
+
+ private int wordTokenLen;
+ private int contractionTokenLen;
+
+ public void setContractionTokenLen(int contractionTokenLen) {
+ this.contractionTokenLen = contractionTokenLen;
+ }
+ public int getContractionTokenLen() {
+ return contractionTokenLen;
+ }
+
+ public void setWordTokenLen(int wordTokenLen) {
+ this.wordTokenLen = wordTokenLen;
+ }
+ public int getWordTokenLen() {
+ return wordTokenLen;
+ }
+
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionsPTB.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionsPTB.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionsPTB.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/ContractionsPTB.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2011 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/HyphenatedPTB.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/HyphenatedPTB.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/HyphenatedPTB.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/HyphenatedPTB.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2011 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/OffsetComparator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/OffsetComparator.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/OffsetComparator.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/OffsetComparator.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,33 +14,33 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.nlp.tokenizer;
-
-import java.util.Comparator;
-
-/**
- * Sorts Token objects by their offset.
- *
- * @author Mayo Clinic
- */
-public class OffsetComparator implements Comparator<Token> {
- public int compare(Token tok1, Token tok2) {
- if (tok1.getStartOffset() < tok2.getStartOffset()) {
- return -1;
- } else if (tok1.getStartOffset() > tok2.getStartOffset()) {
- return 1;
- } else {
- // equal start offsets, now look at end offsets
- if (tok1.getEndOffset() < tok2.getEndOffset()) {
- return -1;
- } else if (tok1.getEndOffset() > tok2.getEndOffset()) {
- return 1;
- } else {
- // all offsets are equal
- return 0;
- }
- }
-
- }
-
-}
+package edu.mayo.bmi.nlp.tokenizer;
+
+import java.util.Comparator;
+
+/**
+ * Sorts Token objects by their offset.
+ *
+ * @author Mayo Clinic
+ */
+public class OffsetComparator implements Comparator<Token> {
+ public int compare(Token tok1, Token tok2) {
+ if (tok1.getStartOffset() < tok2.getStartOffset()) {
+ return -1;
+ } else if (tok1.getStartOffset() > tok2.getStartOffset()) {
+ return 1;
+ } else {
+ // equal start offsets, now look at end offsets
+ if (tok1.getEndOffset() < tok2.getEndOffset()) {
+ return -1;
+ } else if (tok1.getEndOffset() > tok2.getEndOffset()) {
+ return 1;
+ } else {
+ // all offsets are equal
+ return 0;
+ }
+ }
+
+ }
+
+}
Modified: incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/Token.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/Token.java?rev=1403989&r1=1403988&r2=1403989&view=diff
==============================================================================
--- incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/Token.java (original)
+++ incubator/ctakes/branches/SHARPn-cTAKES/core/src/edu/mayo/bmi/nlp/tokenizer/Token.java Wed Oct 31 05:26:43 2012
@@ -1,18 +1,11 @@
/*
- * Copyright: (c) 2009 Mayo Foundation for Medical Education and
- * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
- * triple-shield Mayo logo are trademarks and service marks of MFMER.
- *
- * Except as contained in the copyright notice above, or as used to identify
- * MFMER as the author of this software, the trade names, trademarks, service
- * marks, or product names of the copyright holder shall not be used in
- * advertising, promotion or otherwise in connection with this software without
- * prior written authorization of the copyright holder.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
@@ -21,227 +14,227 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.mayo.bmi.nlp.tokenizer;
-
-/**
- * Object that represents a generic token. The token is related back to the
- * original text via a start and end offset. These are character positions
- * that relate directly to the original text.
- *
- * A token can be one of many different types. Please see the javadoc for the
- * TYPE fields to see a description of each.
- *
- * @author Mayo Clinic
- */
-public class Token
-{
- /**
- * The type is unknown.
- */
- public static final byte TYPE_UNKNOWN = 0;
-
- /**
- * A word token is defined as a consecutive series of word characters.
- * Word characters are defined as A-Z and a-z. A word token may contain
- * hypens if the hyphen has a word character on each side. A word token
- * may contain an apostrophe if the apostrophe has a word character on each
- * side.
- */
- public static final byte TYPE_WORD = 1;
-
- /**
- * A number token is defined as a consecutive series of digits.
- */
- public static final byte TYPE_NUMBER = 2;
-
- /**
- * A punctuation token is defined as one character that can be either a
- * period, double quote, single quote, question mark, exclamation point,
- * hyphen (if not surrounded by word characters), etc...
- */
- public static final byte TYPE_PUNCT = 3;
-
- /**
- * A EOL token is defined as a line feed or carriage return character.
- */
- public static final byte TYPE_EOL = 4;
-
- /**
- * Contains contractions and possessives (since they cannot be
- * differentiated without context).
- */
- public static final byte TYPE_CONTRACTION = 5;
-
- /**
- * Characters @!#$%^&*?
- */
- public static final byte TYPE_SYMBOL = 6;
-
- public static final byte CAPS_UNKNOWN = 0;
- public static final byte CAPS_NONE = 1;
- public static final byte CAPS_MIXED = 2;
- public static final byte CAPS_FIRST_ONLY = 3;
- public static final byte CAPS_ALL = 4;
-
- public static final byte NUM_NONE = 0;
- public static final byte NUM_FIRST = 1;
- public static final byte NUM_MIDDLE = 2;
- public static final byte NUM_LAST = 3;
-
- private byte iv_type = TYPE_UNKNOWN;
- private byte iv_caps = CAPS_UNKNOWN;
- private byte iv_numPosition = NUM_NONE;
- private int iv_startOffset = 0;
- private int iv_endOffset = 0;
- private String iv_text;
- private boolean iv_isInteger;
-
- /**
- * Constructor
- * @param startOffset The token's start offset.
- * @param endOffset The token's end offset.
- */
- public Token(int startOffset, int endOffset)
- {
- iv_startOffset = startOffset;
- iv_endOffset = endOffset;
- }
-
- /**
- * Gets the end offset. This is the position directly after the last letter.
- */
- public int getEndOffset()
- {
- return iv_endOffset;
- }
-
- /**
- * Sets the end offset. This is the position directly after the last letter.
- */
- public void setEndOffset(int i)
- {
- iv_endOffset = i;
- }
-
- /**
- * Gets the start offset. This is the position of the first letter.
- */
- public int getStartOffset()
- {
- return iv_startOffset;
- }
-
- /**
- * Sets the start offset. This is the position of the first letter.
- */
- public void setStartOffset(int i)
- {
- iv_startOffset = i;
- }
-
- /**
- * Gets the type of the token. Please see the javadoc for the TYPE fields.
- */
- public byte getType()
- {
- return iv_type;
- }
-
- /**
- * Sets the type of the token. Please see the javadoc for the TYPE fields.
- */
- public void setType(byte b)
- {
- iv_type = b;
- }
-
- /**
- * Gets the caps state of the token.
- */
- public byte getCaps()
- {
- return iv_caps;
- }
-
- /**
- * Sets the caps state of the token.
- */
- public void setCaps(byte b)
- {
- iv_caps = b;
- }
-
- /**
- * Gets the position of a number inside a Token.
- */
- public byte getNumPosition()
- {
- return iv_numPosition;
- }
-
- /**
- * Sets the position of a number inside a Token.
- */
- public void setNumPosition(byte b)
- {
- iv_numPosition = b;
- }
-
- /**
- * @return
- */
- public String getText()
- {
- return iv_text;
- }
-
- /**
- * @param i
- */
- public void setText(String s)
- {
- iv_text = s;
- }
-
- /**
- * @return
- */
- public boolean isInteger()
- {
- return iv_isInteger;
- }
-
- /**
- * @param b
- */
- public void setIsInteger(boolean isInteger)
- {
- iv_isInteger = isInteger;
- }
-
- public String toString()
- {
- return "\""+iv_text+"\" ("+iv_startOffset+","+iv_endOffset+") type="+typeDescription(iv_type);
- }
-
- public static String typeDescription(byte type)
- {
- if(type == TYPE_UNKNOWN)
- return "TYPE_UNKNOWN";
- else if(type == TYPE_WORD)
- return "TYPE_WORD";
- else if(type == TYPE_NUMBER)
- return "TYPE_NUMBER";
- else if(type == TYPE_PUNCT)
- return "TYPE_PUNCT";
- else if(type == TYPE_EOL)
- return "TYPE_EOL";
- else if(type == TYPE_CONTRACTION)
- return "TYPE_CONTRACTION";
- else if(type == TYPE_SYMBOL)
- return "TYPE_SYMBOL";
- return "not a valid type";
- }
-}
-
-
+package edu.mayo.bmi.nlp.tokenizer;
+
+/**
+ * Object that represents a generic token. The token is related back to the
+ * original text via a start and end offset. These are character positions
+ * that relate directly to the original text.
+ *
+ * A token can be one of many different types. Please see the javadoc for the
+ * TYPE fields to see a description of each.
+ *
+ * @author Mayo Clinic
+ */
+public class Token
+{
+ /**
+ * The type is unknown.
+ */
+ public static final byte TYPE_UNKNOWN = 0;
+
+ /**
+ * A word token is defined as a consecutive series of word characters.
+ * Word characters are defined as A-Z and a-z. A word token may contain
+ * hypens if the hyphen has a word character on each side. A word token
+ * may contain an apostrophe if the apostrophe has a word character on each
+ * side.
+ */
+ public static final byte TYPE_WORD = 1;
+
+ /**
+ * A number token is defined as a consecutive series of digits.
+ */
+ public static final byte TYPE_NUMBER = 2;
+
+ /**
+ * A punctuation token is defined as one character that can be either a
+ * period, double quote, single quote, question mark, exclamation point,
+ * hyphen (if not surrounded by word characters), etc...
+ */
+ public static final byte TYPE_PUNCT = 3;
+
+ /**
+ * A EOL token is defined as a line feed or carriage return character.
+ */
+ public static final byte TYPE_EOL = 4;
+
+ /**
+ * Contains contractions and possessives (since they cannot be
+ * differentiated without context).
+ */
+ public static final byte TYPE_CONTRACTION = 5;
+
+ /**
+ * Characters @!#$%^&*?
+ */
+ public static final byte TYPE_SYMBOL = 6;
+
+ public static final byte CAPS_UNKNOWN = 0;
+ public static final byte CAPS_NONE = 1;
+ public static final byte CAPS_MIXED = 2;
+ public static final byte CAPS_FIRST_ONLY = 3;
+ public static final byte CAPS_ALL = 4;
+
+ public static final byte NUM_NONE = 0;
+ public static final byte NUM_FIRST = 1;
+ public static final byte NUM_MIDDLE = 2;
+ public static final byte NUM_LAST = 3;
+
+ private byte iv_type = TYPE_UNKNOWN;
+ private byte iv_caps = CAPS_UNKNOWN;
+ private byte iv_numPosition = NUM_NONE;
+ private int iv_startOffset = 0;
+ private int iv_endOffset = 0;
+ private String iv_text;
+ private boolean iv_isInteger;
+
+ /**
+ * Constructor
+ * @param startOffset The token's start offset.
+ * @param endOffset The token's end offset.
+ */
+ public Token(int startOffset, int endOffset)
+ {
+ iv_startOffset = startOffset;
+ iv_endOffset = endOffset;
+ }
+
+ /**
+ * Gets the end offset. This is the position directly after the last letter.
+ */
+ public int getEndOffset()
+ {
+ return iv_endOffset;
+ }
+
+ /**
+ * Sets the end offset. This is the position directly after the last letter.
+ */
+ public void setEndOffset(int i)
+ {
+ iv_endOffset = i;
+ }
+
+ /**
+ * Gets the start offset. This is the position of the first letter.
+ */
+ public int getStartOffset()
+ {
+ return iv_startOffset;
+ }
+
+ /**
+ * Sets the start offset. This is the position of the first letter.
+ */
+ public void setStartOffset(int i)
+ {
+ iv_startOffset = i;
+ }
+
+ /**
+ * Gets the type of the token. Please see the javadoc for the TYPE fields.
+ */
+ public byte getType()
+ {
+ return iv_type;
+ }
+
+ /**
+ * Sets the type of the token. Please see the javadoc for the TYPE fields.
+ */
+ public void setType(byte b)
+ {
+ iv_type = b;
+ }
+
+ /**
+ * Gets the caps state of the token.
+ */
+ public byte getCaps()
+ {
+ return iv_caps;
+ }
+
+ /**
+ * Sets the caps state of the token.
+ */
+ public void setCaps(byte b)
+ {
+ iv_caps = b;
+ }
+
+ /**
+ * Gets the position of a number inside a Token.
+ */
+ public byte getNumPosition()
+ {
+ return iv_numPosition;
+ }
+
+ /**
+ * Sets the position of a number inside a Token.
+ */
+ public void setNumPosition(byte b)
+ {
+ iv_numPosition = b;
+ }
+
+ /**
+ * @return
+ */
+ public String getText()
+ {
+ return iv_text;
+ }
+
+ /**
+ * @param i
+ */
+ public void setText(String s)
+ {
+ iv_text = s;
+ }
+
+ /**
+ * @return
+ */
+ public boolean isInteger()
+ {
+ return iv_isInteger;
+ }
+
+ /**
+ * @param b
+ */
+ public void setIsInteger(boolean isInteger)
+ {
+ iv_isInteger = isInteger;
+ }
+
+ public String toString()
+ {
+ return "\""+iv_text+"\" ("+iv_startOffset+","+iv_endOffset+") type="+typeDescription(iv_type);
+ }
+
+ public static String typeDescription(byte type)
+ {
+ if(type == TYPE_UNKNOWN)
+ return "TYPE_UNKNOWN";
+ else if(type == TYPE_WORD)
+ return "TYPE_WORD";
+ else if(type == TYPE_NUMBER)
+ return "TYPE_NUMBER";
+ else if(type == TYPE_PUNCT)
+ return "TYPE_PUNCT";
+ else if(type == TYPE_EOL)
+ return "TYPE_EOL";
+ else if(type == TYPE_CONTRACTION)
+ return "TYPE_CONTRACTION";
+ else if(type == TYPE_SYMBOL)
+ return "TYPE_SYMBOL";
+ return "not a valid type";
+ }
+}
+
+