You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2012/09/17 17:55:18 UTC
svn commit: r1386675 [3/10] - in /lucene/dev/branches/branch_4x:
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/
lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/
lucene/analysis/common/src/java/org/apache/lucene/ana...
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1386675&r1=1386674&r2=1386675&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Mon Sep 17 15:55:11 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 1:23 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/17/12 9:15 AM */
package org.apache.lucene.analysis.wikipedia;
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 8/6/12 1:23 PM from the specification file
- * <tt>/home/rmuir/workspace/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 9/17/12 9:15 AM from the specification file
+ * <tt>/Users/Erick/apache/4x_4326/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@@ -37,16 +37,16 @@ class WikipediaTokenizerImpl {
private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
- public static final int THREE_SINGLE_QUOTES_STATE = 10;
+ public static final int YYINITIAL = 0;
+ public static final int CATEGORY_STATE = 2;
+ public static final int INTERNAL_LINK_STATE = 4;
public static final int EXTERNAL_LINK_STATE = 6;
+ public static final int TWO_SINGLE_QUOTES_STATE = 8;
+ public static final int THREE_SINGLE_QUOTES_STATE = 10;
+ public static final int FIVE_SINGLE_QUOTES_STATE = 12;
public static final int DOUBLE_EQUALS_STATE = 14;
- public static final int INTERNAL_LINK_STATE = 4;
public static final int DOUBLE_BRACE_STATE = 16;
- public static final int CATEGORY_STATE = 2;
- public static final int YYINITIAL = 0;
public static final int STRING = 18;
- public static final int FIVE_SINGLE_QUOTES_STATE = 12;
- public static final int TWO_SINGLE_QUOTES_STATE = 8;
/**
* ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
@@ -589,7 +589,7 @@ final void reset() {
}
}
- // numRead < 0
+ // numRead < 0
return true;
}
@@ -810,188 +810,188 @@ final void reset() {
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
- case 44:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
+ case 1:
+ { numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
}
case 47: break;
- case 37:
- { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 2:
+ { positionInc = 1; return ALPHANUM;
}
case 48: break;
- case 16:
- { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
+ case 3:
+ { positionInc = 1; return CJ;
}
case 49: break;
- case 20:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 4:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 50: break;
- case 40:
- { positionInc = 1; return ACRONYM;
- }
- case 51: break;
case 5:
{ positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
}
+ case 51: break;
+ case 6:
+ { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+ }
case 52: break;
- case 36:
- { positionInc = 1; return COMPANY;
+ case 7:
+ { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
}
case 53: break;
- case 10:
- { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+ case 8:
+ { /* Break so we don't hit fall-through warning: */ break;/* ignore */
}
case 54: break;
- case 15:
- { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
+ case 9:
+ { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
}
case 55: break;
- case 22:
- { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
+ case 10:
+ { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 56: break;
- case 35:
- { positionInc = 1; return NUM;
+ case 11:
+ { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 57: break;
- case 33:
- { positionInc = 1; return APOSTROPHE;
+ case 12:
+ { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/
}
case 58: break;
- case 21:
- { yybegin(STRING); return currentTokType;/*pipe*/
+ case 13:
+ { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 59: break;
- case 18:
- { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
+ case 14:
+ { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
}
case 60: break;
- case 2:
- { positionInc = 1; return ALPHANUM;
+ case 15:
+ { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
}
case 61: break;
- case 1:
- { numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+ case 16:
+ { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
}
case 62: break;
case 17:
{ yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
}
case 63: break;
- case 39:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
+ case 18:
+ { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
}
case 64: break;
- case 29:
- { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 19:
+ { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
}
case 65: break;
- case 46:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 20:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 66: break;
- case 27:
- { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+ case 21:
+ { yybegin(STRING); return currentTokType;/*pipe*/
}
case 67: break;
- case 4:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+ case 22:
+ { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
}
case 68: break;
- case 38:
- { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
+ case 23:
+ { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 69: break;
- case 13:
- { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 24:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 70: break;
- case 3:
- { positionInc = 1; return CJ;
+ case 25:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 71: break;
- case 45:
- { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 26:
+ { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
}
case 72: break;
- case 6:
- { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+ case 27:
+ { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 73: break;
- case 11:
- { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 28:
+ { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 74: break;
- case 25:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
+ case 29:
+ { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 75: break;
- case 8:
- { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+ case 30:
+ { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 76: break;
- case 19:
- { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
+ case 31:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
}
case 77: break;
- case 43:
- { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+ case 32:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 78: break;
- case 42:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
+ case 33:
+ { positionInc = 1; return APOSTROPHE;
}
case 79: break;
- case 30:
- { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+ case 34:
+ { positionInc = 1; return HOST;
}
case 80: break;
- case 14:
- { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
+ case 35:
+ { positionInc = 1; return NUM;
}
case 81: break;
- case 9:
- { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+ case 36:
+ { positionInc = 1; return COMPANY;
}
case 82: break;
- case 7:
- { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
+ case 37:
+ { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 83: break;
- case 41:
- { positionInc = 1; return EMAIL;
+ case 38:
+ { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
}
case 84: break;
- case 28:
- { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 39:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
}
case 85: break;
- case 23:
- { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
+ case 40:
+ { positionInc = 1; return ACRONYM;
}
case 86: break;
- case 34:
- { positionInc = 1; return HOST;
+ case 41:
+ { positionInc = 1; return EMAIL;
}
case 87: break;
- case 32:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+ case 42:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
}
case 88: break;
- case 12:
- { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/
+ case 43:
+ { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
}
case 89: break;
- case 24:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+ case 44:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 90: break;
- case 31:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
+ case 45:
+ { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 91: break;
- case 26:
- { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
+ case 46:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 92: break;
default:
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex?rev=1386675&r1=1386674&r2=1386675&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex Mon Sep 17 15:55:11 2012
@@ -136,7 +136,7 @@ NUM = ({ALPHANUM} {P} {HAS_DIGIT}
TAGS = "<"\/?{ALPHANUM}({WHITESPACE}*{ALPHANUM}=\"{ALPHANUM}\")*">"
// punctuation
-P = ("_"|"-"|"/"|"."|",")
+P = ("_"|"-"|"/"|"."|",")
// at least one digit
HAS_DIGIT =
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java?rev=1386675&r1=1386674&r2=1386675&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java Mon Sep 17 15:55:11 2012
@@ -43,25 +43,26 @@ import java.lang.reflect.Method;
* reflection calls (Lovins, etc) use EMPTY_ARGS/EMPTY_PARAMS
*/
public class Among {
- private static final Class<?>[] EMPTY_PARAMS = new Class[0];
- public Among (String s, int substring_i, int result,
- String methodname, SnowballProgram methodobject) {
- this.s_size = s.length();
- this.s = s.toCharArray();
- this.substring_i = substring_i;
- this.result = result;
- this.methodobject = methodobject;
- if (methodname.length() == 0) {
- this.method = null;
- } else {
- try {
- this.method = methodobject.getClass().
- getDeclaredMethod(methodname, EMPTY_PARAMS);
- } catch (NoSuchMethodException e) {
- throw new RuntimeException(e);
- }
- }
+ private static final Class<?>[] EMPTY_PARAMS = new Class[0];
+
+ public Among(String s, int substring_i, int result,
+ String methodname, SnowballProgram methodobject) {
+ this.s_size = s.length();
+ this.s = s.toCharArray();
+ this.substring_i = substring_i;
+ this.result = result;
+ this.methodobject = methodobject;
+ if (methodname.length() == 0) {
+ this.method = null;
+ } else {
+ try {
+ this.method = methodobject.getClass().
+ getDeclaredMethod(methodname, EMPTY_PARAMS);
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ }
}
+ }
public final int s_size; /* search string */
public final char[] s; /* search string */
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java?rev=1386675&r1=1386674&r2=1386675&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java Mon Sep 17 15:55:11 2012
@@ -51,8 +51,8 @@ public abstract class SnowballProgram {
protected SnowballProgram()
{
- current = new char[8];
- setCurrent("");
+ current = new char[8];
+ setCurrent("");
}
public abstract boolean stem();
@@ -62,12 +62,12 @@ public abstract class SnowballProgram {
*/
public void setCurrent(String value)
{
- current = value.toCharArray();
- cursor = 0;
- limit = value.length();
- limit_backward = 0;
- bra = cursor;
- ket = limit;
+ current = value.toCharArray();
+ cursor = 0;
+ limit = value.length();
+ limit_backward = 0;
+ bra = cursor;
+ ket = limit;
}
/**
@@ -130,354 +130,350 @@ public abstract class SnowballProgram {
protected void copy_from(SnowballProgram other)
{
- current = other.current;
- cursor = other.cursor;
- limit = other.limit;
- limit_backward = other.limit_backward;
- bra = other.bra;
- ket = other.ket;
+ current = other.current;
+ cursor = other.cursor;
+ limit = other.limit;
+ limit_backward = other.limit_backward;
+ bra = other.bra;
+ ket = other.ket;
}
protected boolean in_grouping(char [] s, int min, int max)
{
- if (cursor >= limit) return false;
- char ch = current[cursor];
- if (ch > max || ch < min) return false;
- ch -= min;
- if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
- cursor++;
- return true;
+ if (cursor >= limit) return false;
+ char ch = current[cursor];
+ if (ch > max || ch < min) return false;
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+ cursor++;
+ return true;
}
protected boolean in_grouping_b(char [] s, int min, int max)
{
- if (cursor <= limit_backward) return false;
- char ch = current[cursor - 1];
- if (ch > max || ch < min) return false;
- ch -= min;
- if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
- cursor--;
- return true;
+ if (cursor <= limit_backward) return false;
+ char ch = current[cursor - 1];
+ if (ch > max || ch < min) return false;
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+ cursor--;
+ return true;
}
protected boolean out_grouping(char [] s, int min, int max)
{
- if (cursor >= limit) return false;
- char ch = current[cursor];
- if (ch > max || ch < min) {
- cursor++;
- return true;
- }
- ch -= min;
- if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
- cursor ++;
- return true;
- }
- return false;
+ if (cursor >= limit) return false;
+ char ch = current[cursor];
+ if (ch > max || ch < min) {
+ cursor++;
+ return true;
+ }
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+ cursor ++;
+ return true;
+ }
+ return false;
}
protected boolean out_grouping_b(char [] s, int min, int max)
{
- if (cursor <= limit_backward) return false;
- char ch = current[cursor - 1];
- if (ch > max || ch < min) {
- cursor--;
- return true;
- }
- ch -= min;
- if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
- cursor--;
- return true;
- }
- return false;
+ if (cursor <= limit_backward) return false;
+ char ch = current[cursor - 1];
+ if (ch > max || ch < min) {
+ cursor--;
+ return true;
+ }
+ ch -= min;
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+ cursor--;
+ return true;
+ }
+ return false;
}
protected boolean in_range(int min, int max)
{
- if (cursor >= limit) return false;
- char ch = current[cursor];
- if (ch > max || ch < min) return false;
- cursor++;
- return true;
+ if (cursor >= limit) return false;
+ char ch = current[cursor];
+ if (ch > max || ch < min) return false;
+ cursor++;
+ return true;
}
protected boolean in_range_b(int min, int max)
{
- if (cursor <= limit_backward) return false;
- char ch = current[cursor - 1];
- if (ch > max || ch < min) return false;
- cursor--;
- return true;
+ if (cursor <= limit_backward) return false;
+ char ch = current[cursor - 1];
+ if (ch > max || ch < min) return false;
+ cursor--;
+ return true;
}
protected boolean out_range(int min, int max)
{
- if (cursor >= limit) return false;
- char ch = current[cursor];
- if (!(ch > max || ch < min)) return false;
- cursor++;
- return true;
+ if (cursor >= limit) return false;
+ char ch = current[cursor];
+ if (!(ch > max || ch < min)) return false;
+ cursor++;
+ return true;
}
protected boolean out_range_b(int min, int max)
{
- if (cursor <= limit_backward) return false;
- char ch = current[cursor - 1];
- if(!(ch > max || ch < min)) return false;
- cursor--;
- return true;
+ if (cursor <= limit_backward) return false;
+ char ch = current[cursor - 1];
+ if(!(ch > max || ch < min)) return false;
+ cursor--;
+ return true;
}
protected boolean eq_s(int s_size, CharSequence s)
{
- if (limit - cursor < s_size) return false;
- int i;
- for (i = 0; i != s_size; i++) {
- if (current[cursor + i] != s.charAt(i)) return false;
- }
- cursor += s_size;
- return true;
+ if (limit - cursor < s_size) return false;
+ int i;
+ for (i = 0; i != s_size; i++) {
+ if (current[cursor + i] != s.charAt(i)) return false;
+ }
+ cursor += s_size;
+ return true;
}
protected boolean eq_s_b(int s_size, CharSequence s)
{
- if (cursor - limit_backward < s_size) return false;
- int i;
- for (i = 0; i != s_size; i++) {
- if (current[cursor - s_size + i] != s.charAt(i)) return false;
- }
- cursor -= s_size;
- return true;
+ if (cursor - limit_backward < s_size) return false;
+ int i;
+ for (i = 0; i != s_size; i++) {
+ if (current[cursor - s_size + i] != s.charAt(i)) return false;
+ }
+ cursor -= s_size;
+ return true;
}
protected boolean eq_v(CharSequence s)
{
- return eq_s(s.length(), s);
+ return eq_s(s.length(), s);
}
protected boolean eq_v_b(CharSequence s)
- { return eq_s_b(s.length(), s);
+ {
+ return eq_s_b(s.length(), s);
}
protected int find_among(Among v[], int v_size)
{
- int i = 0;
- int j = v_size;
+ int i = 0;
+ int j = v_size;
+
+ int c = cursor;
+ int l = limit;
- int c = cursor;
- int l = limit;
+ int common_i = 0;
+ int common_j = 0;
- int common_i = 0;
- int common_j = 0;
+ boolean first_key_inspected = false;
+
+ while (true) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j; // smaller
+ Among w = v[k];
+ int i2;
+ for (i2 = common; i2 < w.s_size; i2++) {
+ if (c + common == l) {
+ diff = -1;
+ break;
+ }
+ diff = current[c + common] - w.s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ if (diff < 0) {
+ j = k;
+ common_j = common;
+ } else {
+ i = k;
+ common_i = common;
+ }
+ if (j - i <= 1) {
+ if (i > 0) break; // v->s has been inspected
+ if (j == i) break; // only one item in v
+
+ // - but now we need to go round once more to get
+ // v->s inspected. This looks messy, but is actually
+ // the optimal approach.
- boolean first_key_inspected = false;
-
- while(true) {
- int k = i + ((j - i) >> 1);
- int diff = 0;
- int common = common_i < common_j ? common_i : common_j; // smaller
- Among w = v[k];
- int i2;
- for (i2 = common; i2 < w.s_size; i2++) {
- if (c + common == l) {
- diff = -1;
- break;
- }
- diff = current[c + common] - w.s[i2];
- if (diff != 0) break;
- common++;
- }
- if (diff < 0) {
- j = k;
- common_j = common;
- } else {
- i = k;
- common_i = common;
- }
- if (j - i <= 1) {
- if (i > 0) break; // v->s has been inspected
- if (j == i) break; // only one item in v
-
- // - but now we need to go round once more to get
- // v->s inspected. This looks messy, but is actually
- // the optimal approach.
-
- if (first_key_inspected) break;
- first_key_inspected = true;
- }
- }
- while(true) {
- Among w = v[i];
- if (common_i >= w.s_size) {
- cursor = c + w.s_size;
- if (w.method == null) return w.result;
- boolean res;
- try {
- Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
- res = resobj.toString().equals("true");
- } catch (InvocationTargetException e) {
- res = false;
- // FIXME - debug message
- } catch (IllegalAccessException e) {
- res = false;
- // FIXME - debug message
- }
- cursor = c + w.s_size;
- if (res) return w.result;
- }
- i = w.substring_i;
- if (i < 0) return 0;
- }
+ if (first_key_inspected) break;
+ first_key_inspected = true;
+ }
+ }
+ while (true) {
+ Among w = v[i];
+ if (common_i >= w.s_size) {
+ cursor = c + w.s_size;
+ if (w.method == null) return w.result;
+ boolean res;
+ try {
+ Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
+ res = resobj.toString().equals("true");
+ } catch (InvocationTargetException e) {
+ res = false;
+ // FIXME - debug message
+ } catch (IllegalAccessException e) {
+ res = false;
+ // FIXME - debug message
+ }
+ cursor = c + w.s_size;
+ if (res) return w.result;
+ }
+ i = w.substring_i;
+ if (i < 0) return 0;
+ }
}
- // find_among_b is for backwards processing. Same comments apply
+ // find_among_b is for backwards processing. Same comments apply
protected int find_among_b(Among v[], int v_size)
{
- int i = 0;
- int j = v_size;
+ int i = 0;
+ int j = v_size;
- int c = cursor;
- int lb = limit_backward;
+ int c = cursor;
+ int lb = limit_backward;
- int common_i = 0;
- int common_j = 0;
+ int common_i = 0;
+ int common_j = 0;
- boolean first_key_inspected = false;
-
- while(true) {
- int k = i + ((j - i) >> 1);
- int diff = 0;
- int common = common_i < common_j ? common_i : common_j;
- Among w = v[k];
- int i2;
- for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
- if (c - common == lb) {
- diff = -1;
- break;
- }
- diff = current[c - 1 - common] - w.s[i2];
- if (diff != 0) break;
- common++;
- }
- if (diff < 0) {
- j = k;
- common_j = common;
- } else {
- i = k;
- common_i = common;
- }
- if (j - i <= 1) {
- if (i > 0) break;
- if (j == i) break;
- if (first_key_inspected) break;
- first_key_inspected = true;
- }
- }
- while(true) {
- Among w = v[i];
- if (common_i >= w.s_size) {
- cursor = c - w.s_size;
- if (w.method == null) return w.result;
-
- boolean res;
- try {
- Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
- res = resobj.toString().equals("true");
- } catch (InvocationTargetException e) {
- res = false;
- // FIXME - debug message
- } catch (IllegalAccessException e) {
- res = false;
- // FIXME - debug message
- }
- cursor = c - w.s_size;
- if (res) return w.result;
- }
- i = w.substring_i;
- if (i < 0) return 0;
- }
+ boolean first_key_inspected = false;
+
+ while (true) {
+ int k = i + ((j - i) >> 1);
+ int diff = 0;
+ int common = common_i < common_j ? common_i : common_j;
+ Among w = v[k];
+ int i2;
+ for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
+ if (c - common == lb) {
+ diff = -1;
+ break;
+ }
+ diff = current[c - 1 - common] - w.s[i2];
+ if (diff != 0) break;
+ common++;
+ }
+ if (diff < 0) {
+ j = k;
+ common_j = common;
+ } else {
+ i = k;
+ common_i = common;
+ }
+ if (j - i <= 1) {
+ if (i > 0) break;
+ if (j == i) break;
+ if (first_key_inspected) break;
+ first_key_inspected = true;
+ }
+ }
+ while (true) {
+ Among w = v[i];
+ if (common_i >= w.s_size) {
+ cursor = c - w.s_size;
+ if (w.method == null) return w.result;
+
+ boolean res;
+ try {
+ Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
+ res = resobj.toString().equals("true");
+ } catch (InvocationTargetException e) {
+ res = false;
+ // FIXME - debug message
+ } catch (IllegalAccessException e) {
+ res = false;
+ // FIXME - debug message
+ }
+ cursor = c - w.s_size;
+ if (res) return w.result;
+ }
+ i = w.substring_i;
+ if (i < 0) return 0;
+ }
}
- /* to replace chars between c_bra and c_ket in current by the
+ /* to replace chars between c_bra and c_ket in current by the
* chars in s.
*/
- protected int replace_s(int c_bra, int c_ket, CharSequence s)
- {
- final int adjustment = s.length() - (c_ket - c_bra);
- final int newLength = limit + adjustment;
- //resize if necessary
- if (newLength > current.length) {
- char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)];
- System.arraycopy(current, 0, newBuffer, 0, limit);
- current = newBuffer;
- }
- // if the substring being replaced is longer or shorter than the
- // replacement, need to shift things around
- if (adjustment != 0 && c_ket < limit) {
- System.arraycopy(current, c_ket, current, c_bra + s.length(),
- limit - c_ket);
- }
- // insert the replacement text
- // Note, faster is s.getChars(0, s.length(), current, c_bra);
- // but would have to duplicate this method for both String and StringBuilder
- for (int i = 0; i < s.length(); i++)
- current[c_bra + i] = s.charAt(i);
-
- limit += adjustment;
- if (cursor >= c_ket) cursor += adjustment;
- else if (cursor > c_bra) cursor = c_bra;
- return adjustment;
- }
-
- protected void slice_check()
- {
- if (bra < 0 ||
- bra > ket ||
- ket > limit)
- {
- throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
- // FIXME: report error somehow.
- /*
- fprintf(stderr, "faulty slice operation:\n");
- debug(z, -1, 0);
- exit(1);
- */
- }
- }
-
- protected void slice_from(CharSequence s)
- {
- slice_check();
- replace_s(bra, ket, s);
- }
-
- protected void slice_del()
- {
- slice_from((CharSequence)"");
- }
-
- protected void insert(int c_bra, int c_ket, CharSequence s)
- {
- int adjustment = replace_s(c_bra, c_ket, s);
- if (c_bra <= bra) bra += adjustment;
- if (c_bra <= ket) ket += adjustment;
+ protected int replace_s(int c_bra, int c_ket, CharSequence s) {
+ final int adjustment = s.length() - (c_ket - c_bra);
+ final int newLength = limit + adjustment;
+ //resize if necessary
+ if (newLength > current.length) {
+ char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)];
+ System.arraycopy(current, 0, newBuffer, 0, limit);
+ current = newBuffer;
+ }
+ // if the substring being replaced is longer or shorter than the
+ // replacement, need to shift things around
+ if (adjustment != 0 && c_ket < limit) {
+ System.arraycopy(current, c_ket, current, c_bra + s.length(),
+ limit - c_ket);
+ }
+ // insert the replacement text
+ // Note, faster is s.getChars(0, s.length(), current, c_bra);
+ // but would have to duplicate this method for both String and StringBuilder
+ for (int i = 0; i < s.length(); i++)
+ current[c_bra + i] = s.charAt(i);
+
+ limit += adjustment;
+ if (cursor >= c_ket) cursor += adjustment;
+ else if (cursor > c_bra) cursor = c_bra;
+ return adjustment;
+ }
+
+ protected void slice_check() {
+ if (bra < 0 ||
+ bra > ket ||
+ ket > limit) {
+ throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
+ // FIXME: report error somehow.
+ /*
+ fprintf(stderr, "faulty slice operation:\n");
+ debug(z, -1, 0);
+ exit(1);
+ */
+ }
+ }
+
+ protected void slice_from(CharSequence s) {
+ slice_check();
+ replace_s(bra, ket, s);
+ }
+
+ protected void slice_del() {
+ slice_from((CharSequence) "");
+ }
+
+ protected void insert(int c_bra, int c_ket, CharSequence s)
+ {
+ int adjustment = replace_s(c_bra, c_ket, s);
+ if (c_bra <= bra) bra += adjustment;
+ if (c_bra <= ket) ket += adjustment;
}
/* Copy the slice into the supplied StringBuffer */
protected StringBuilder slice_to(StringBuilder s)
{
- slice_check();
- int len = ket - bra;
- s.setLength(0);
- s.append(current, bra, len);
- return s;
+ slice_check();
+ int len = ket - bra;
+ s.setLength(0);
+ s.append(current, bra, len);
+ return s;
}
protected StringBuilder assign_to(StringBuilder s)
{
- s.setLength(0);
- s.append(current, 0, limit);
- return s;
+ s.setLength(0);
+ s.append(current, 0, limit);
+ return s;
}
/*
Modified: lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1386675&r1=1386674&r2=1386675&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Mon Sep 17 15:55:11 2012
@@ -38,87 +38,87 @@ import org.apache.lucene.analysis.util.C
public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
public void testWithSnowballExamples() throws Exception {
- check("boa", "boa");
- check("boainain", "boainain");
- check("boas", "boas");
- check("bôas", "boas"); // removes diacritic: different from snowball portugese
- check("boassu", "boassu");
- check("boataria", "boat");
- check("boate", "boat");
- check("boates", "boat");
- check("boatos", "boat");
- check("bob", "bob");
- check("boba", "bob");
- check("bobagem", "bobag");
- check("bobagens", "bobagens");
- check("bobalhões", "bobalho"); // removes diacritic: different from snowball portugese
- check("bobear", "bob");
- check("bobeira", "bobeir");
- check("bobinho", "bobinh");
- check("bobinhos", "bobinh");
- check("bobo", "bob");
- check("bobs", "bobs");
- check("boca", "boc");
- check("bocadas", "boc");
- check("bocadinho", "bocadinh");
- check("bocado", "boc");
- check("bocaiúva", "bocaiuv"); // removes diacritic: different from snowball portuguese
- check("boçal", "bocal"); // removes diacritic: different from snowball portuguese
- check("bocarra", "bocarr");
- check("bocas", "boc");
- check("bode", "bod");
- check("bodoque", "bodoqu");
- check("body", "body");
- check("boeing", "boeing");
- check("boem", "boem");
- check("boemia", "boem");
- check("boêmio", "boemi"); // removes diacritic: different from snowball portuguese
- check("bogotá", "bogot");
- check("boi", "boi");
- check("bóia", "boi"); // removes diacritic: different from snowball portuguese
- check("boiando", "boi");
- check("quiabo", "quiab");
- check("quicaram", "quic");
- check("quickly", "quickly");
- check("quieto", "quiet");
- check("quietos", "quiet");
- check("quilate", "quilat");
- check("quilates", "quilat");
- check("quilinhos", "quilinh");
- check("quilo", "quil");
- check("quilombo", "quilomb");
- check("quilométricas", "quilometr"); // removes diacritic: different from snowball portuguese
- check("quilométricos", "quilometr"); // removes diacritic: different from snowball portuguese
- check("quilômetro", "quilometr"); // removes diacritic: different from snowball portoguese
- check("quilômetros", "quilometr"); // removes diacritic: different from snowball portoguese
- check("quilos", "quil");
- check("quimica", "quimic");
- check("quilos", "quil");
- check("quimica", "quimic");
- check("quimicas", "quimic");
- check("quimico", "quimic");
- check("quimicos", "quimic");
- check("quimioterapia", "quimioterap");
- check("quimioterápicos", "quimioterap"); // removes diacritic: different from snowball portoguese
- check("quimono", "quimon");
- check("quincas", "quinc");
- check("quinhão", "quinha"); // removes diacritic: different from snowball portoguese
- check("quinhentos", "quinhent");
- check("quinn", "quinn");
- check("quino", "quin");
- check("quinta", "quint");
- check("quintal", "quintal");
- check("quintana", "quintan");
- check("quintanilha", "quintanilh");
- check("quintão", "quinta"); // removes diacritic: different from snowball portoguese
- check("quintessência", "quintessente"); // versus snowball portuguese 'quintessent'
- check("quintino", "quintin");
- check("quinto", "quint");
- check("quintos", "quint");
- check("quintuplicou", "quintuplic");
- check("quinze", "quinz");
- check("quinzena", "quinzen");
- check("quiosque", "quiosqu");
+ check("boa", "boa");
+ check("boainain", "boainain");
+ check("boas", "boas");
+ check("bôas", "boas"); // removes diacritic: different from snowball portugese
+ check("boassu", "boassu");
+ check("boataria", "boat");
+ check("boate", "boat");
+ check("boates", "boat");
+ check("boatos", "boat");
+ check("bob", "bob");
+ check("boba", "bob");
+ check("bobagem", "bobag");
+ check("bobagens", "bobagens");
+ check("bobalhões", "bobalho"); // removes diacritic: different from snowball portugese
+ check("bobear", "bob");
+ check("bobeira", "bobeir");
+ check("bobinho", "bobinh");
+ check("bobinhos", "bobinh");
+ check("bobo", "bob");
+ check("bobs", "bobs");
+ check("boca", "boc");
+ check("bocadas", "boc");
+ check("bocadinho", "bocadinh");
+ check("bocado", "boc");
+ check("bocaiúva", "bocaiuv"); // removes diacritic: different from snowball portuguese
+ check("boçal", "bocal"); // removes diacritic: different from snowball portuguese
+ check("bocarra", "bocarr");
+ check("bocas", "boc");
+ check("bode", "bod");
+ check("bodoque", "bodoqu");
+ check("body", "body");
+ check("boeing", "boeing");
+ check("boem", "boem");
+ check("boemia", "boem");
+ check("boêmio", "boemi"); // removes diacritic: different from snowball portuguese
+ check("bogotá", "bogot");
+ check("boi", "boi");
+ check("bóia", "boi"); // removes diacritic: different from snowball portuguese
+ check("boiando", "boi");
+ check("quiabo", "quiab");
+ check("quicaram", "quic");
+ check("quickly", "quickly");
+ check("quieto", "quiet");
+ check("quietos", "quiet");
+ check("quilate", "quilat");
+ check("quilates", "quilat");
+ check("quilinhos", "quilinh");
+ check("quilo", "quil");
+ check("quilombo", "quilomb");
+ check("quilométricas", "quilometr"); // removes diacritic: different from snowball portuguese
+ check("quilométricos", "quilometr"); // removes diacritic: different from snowball portuguese
+ check("quilômetro", "quilometr"); // removes diacritic: different from snowball portoguese
+ check("quilômetros", "quilometr"); // removes diacritic: different from snowball portoguese
+ check("quilos", "quil");
+ check("quimica", "quimic");
+ check("quilos", "quil");
+ check("quimica", "quimic");
+ check("quimicas", "quimic");
+ check("quimico", "quimic");
+ check("quimicos", "quimic");
+ check("quimioterapia", "quimioterap");
+ check("quimioterápicos", "quimioterap"); // removes diacritic: different from snowball portoguese
+ check("quimono", "quimon");
+ check("quincas", "quinc");
+ check("quinhão", "quinha"); // removes diacritic: different from snowball portoguese
+ check("quinhentos", "quinhent");
+ check("quinn", "quinn");
+ check("quino", "quin");
+ check("quinta", "quint");
+ check("quintal", "quintal");
+ check("quintana", "quintan");
+ check("quintanilha", "quintanilh");
+ check("quintão", "quinta"); // removes diacritic: different from snowball portoguese
+ check("quintessência", "quintessente"); // versus snowball portuguese 'quintessent'
+ check("quintino", "quintin");
+ check("quinto", "quint");
+ check("quintos", "quint");
+ check("quintuplicou", "quintuplic");
+ check("quinze", "quinz");
+ check("quinzena", "quinzen");
+ check("quiosque", "quiosqu");
}
public void testNormalization() throws Exception {
@@ -175,4 +175,4 @@ public class TestBrazilianStemmer extend
};
checkOneTermReuse(a, "", "");
}
-}
\ No newline at end of file
+}