You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/01/19 13:27:20 UTC
svn commit: r1060784 [2/2] - in /lucene/dev/branches/branch_3x: lucene/
lucene/contrib/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/
lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/config/
lucene/contrib/queryp...
Modified: lucene/dev/branches/branch_3x/solr/src/webapp/web/admin/analysis.jsp
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/webapp/web/admin/analysis.jsp?rev=1060784&r1=1060783&r2=1060784&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/webapp/web/admin/analysis.jsp (original)
+++ lucene/dev/branches/branch_3x/solr/src/webapp/web/admin/analysis.jsp Wed Jan 19 12:27:19 2011
@@ -23,6 +23,7 @@
org.apache.lucene.analysis.CharReader,
org.apache.lucene.analysis.CharStream,
org.apache.lucene.analysis.tokenattributes.*,
+ org.apache.lucene.util.AttributeReflector,
org.apache.solr.analysis.CharFilterFactory,
org.apache.solr.analysis.TokenFilterFactory,
org.apache.solr.analysis.TokenizerChain,
@@ -70,19 +71,19 @@
<table>
<tr>
<td>
- <strong>Field
+ <strong>Field
<select name="nt">
- <option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
- <option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
+ <option <%= nt.equals("name") ? "selected=\"selected\"" : "" %> >name</option>
+ <option <%= nt.equals("type") ? "selected=\"selected\"" : "" %>>type</option>
</select></strong>
</td>
<td>
- <input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
+ <input class="std" name="name" type="text" value="<% XML.escapeCharData(name, out); %>">
</td>
</tr>
<tr>
<td>
- <strong>Field value (Index)</strong>
+ <strong>Field value (Index)</strong>
<br/>
verbose output
<input name="verbose" type="checkbox"
@@ -93,19 +94,19 @@
<%= highlight ? "checked=\"true\"" : "" %> >
</td>
<td>
- <textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
+ <textarea class="std" rows="8" cols="70" name="val"><% XML.escapeCharData(val,out); %></textarea>
</td>
</tr>
<tr>
<td>
- <strong>Field value (Query)</strong>
+ <strong>Field value (Query)</strong>
<br/>
verbose output
<input name="qverbose" type="checkbox"
<%= qverbose ? "checked=\"true\"" : "" %> >
</td>
<td>
- <textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
+ <textarea class="std" rows="1" cols="70" name="qval"><% XML.escapeCharData(qval,out); %></textarea>
</td>
</tr>
<tr>
@@ -114,7 +115,7 @@
</td>
<td>
- <input class="stdbutton" type="submit" value="analyze">
+ <input class="stdbutton" type="submit" value="analyze">
</td>
</tr>
@@ -147,24 +148,26 @@
}
if (field!=null) {
- HashSet<Tok> matches = null;
+ HashSet<String> matches = null;
if (qval!="" && highlight) {
Reader reader = new StringReader(qval);
Analyzer analyzer = field.getType().getQueryAnalyzer();
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
+ CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class);
tstream.reset();
- List<AttributeSource> tokens = getTokens(tstream);
- matches = new HashSet<Tok>();
- for (AttributeSource t : tokens) { matches.add( new Tok(t,0)); }
+ matches = new HashSet<String>();
+ while (tstream.incrementToken()) {
+ matches.add(termAtt.toString());
+ }
}
if (val!="") {
out.println("<h3>Index Analyzer</h3>");
- doAnalyzer(out, field, val, false, verbose,matches);
+ doAnalyzer(out, field, val, false, verbose, matches);
}
if (qval!="") {
out.println("<h3>Query Analyzer</h3>");
- doAnalyzer(out, field, qval, true, qverbose,null);
+ doAnalyzer(out, field, qval, true, qverbose, null);
}
}
@@ -176,7 +179,7 @@
<%!
- private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<Tok> match) throws Exception {
+ private static void doAnalyzer(JspWriter out, SchemaField field, String val, boolean queryAnalyser, boolean verbose, Set<String> match) throws Exception {
FieldType ft = field.getType();
Analyzer analyzer = queryAnalyser ?
@@ -239,7 +242,7 @@
tstream.reset();
List<AttributeSource> tokens = getTokens(tstream);
if (verbose) {
- writeHeader(out, analyzer.getClass(), new HashMap<String,String>());
+ writeHeader(out, analyzer.getClass(), Collections.EMPTY_MAP);
}
writeTokens(out, tokens, ft, verbose, match);
}
@@ -248,45 +251,51 @@
static List<AttributeSource> getTokens(TokenStream tstream) throws IOException {
List<AttributeSource> tokens = new ArrayList<AttributeSource>();
-
- while (true) {
- if (!tstream.incrementToken())
- break;
- else {
- tokens.add(tstream.cloneAttributes());
- }
+ tstream.reset();
+ while (tstream.incrementToken()) {
+ tokens.add(tstream.cloneAttributes());
}
return tokens;
}
-
+ private static class ReflectItem {
+ final Class<? extends Attribute> attClass;
+ final String key;
+ final Object value;
+
+ ReflectItem(Class<? extends Attribute> attClass, String key, Object value) {
+ this.attClass = attClass;
+ this.key = key;
+ this.value = value;
+ }
+ }
+
private static class Tok {
- AttributeSource token;
- int pos;
+ final String term;
+ final int pos;
+ final List<ReflectItem> reflected = new ArrayList<ReflectItem>();
+
Tok(AttributeSource token, int pos) {
- this.token=token;
- this.pos=pos;
- }
-
- public boolean equals(Object o) {
- return ((Tok)o).token.toString().equals(token.toString());
- }
- public int hashCode() {
- return token.toString().hashCode();
- }
- public String toString() {
- return token.toString();
- }
- public String toPrintableString() {
- return token.addAttribute(CharTermAttribute.class).toString();
+ this.term = token.addAttribute(CharTermAttribute.class).toString();
+ this.pos = pos;
+ token.reflectWith(new AttributeReflector() {
+ public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
+ // leave out position and term
+ if (CharTermAttribute.class.isAssignableFrom(attClass))
+ return;
+ if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
+ return;
+ reflected.add(new ReflectItem(attClass, key, value));
+ }
+ });
}
}
- private static interface ToStr {
- public String toStr(Object o);
+ private static interface TokToStr {
+ public String toStr(Tok o);
}
- private static void printRow(JspWriter out, String header, List[] arrLst, ToStr converter, boolean multival, boolean verbose, Set<Tok> match) throws IOException {
+ private static void printRow(JspWriter out, String header, String headerTitle, List<Tok>[] arrLst, TokToStr converter, boolean multival, boolean verbose, Set<String> match) throws IOException {
// find the maximum number of terms for any position
int maxSz=1;
if (multival) {
@@ -300,7 +309,13 @@
out.println("<tr>");
if (idx==0 && verbose) {
if (header != null) {
- out.print("<th NOWRAP rowspan=\""+maxSz+"\">");
+ out.print("<th NOWRAP rowspan=\""+maxSz+"\"");
+ if (headerTitle != null) {
+ out.print(" title=\"");
+ XML.escapeCharData(headerTitle,out);
+ out.print("\"");
+ }
+ out.print(">");
XML.escapeCharData(header,out);
out.println("</th>");
}
@@ -309,7 +324,7 @@
for (int posIndex=0; posIndex<arrLst.length; posIndex++) {
List<Tok> lst = arrLst[posIndex];
if (lst.size() <= idx) continue;
- if (match!=null && match.contains(lst.get(idx))) {
+ if (match!=null && match.contains(lst.get(idx).term)) {
out.print("<td class=\"highlight\"");
} else {
out.print("<td class=\"debugdata\"");
@@ -332,14 +347,16 @@
}
+ /* this method is totally broken, as no charset involved: new String(byte[]) is crap!
static String isPayloadString( Payload p ) {
- String sp = new String( p.getData() );
- for( int i=0; i < sp.length(); i++ ) {
- if( !Character.isDefined( sp.charAt(i) ) || Character.isISOControl( sp.charAt(i) ) )
- return "";
- }
- return "(" + sp + ")";
+ String sp = new String(p.getData());
+ for( int i=0; i < sp.length(); i++ ) {
+ if( !Character.isDefined( sp.charAt(i) ) || Character.isISOControl( sp.charAt(i) ) )
+ return "";
+ }
+ return "(" + sp + ")";
}
+ */
static void writeHeader(JspWriter out, Class clazz, Map<String,String> args) throws IOException {
out.print("<h4>");
@@ -351,16 +368,17 @@
// readable, raw, pos, type, start/end
- static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<Tok> match) throws IOException {
+ static void writeTokens(JspWriter out, List<AttributeSource> tokens, final FieldType ft, boolean verbose, Set<String> match) throws IOException {
// Use a map to tell what tokens are in what positions
// because some tokenizers/filters may do funky stuff with
// very large increments, or negative increments.
HashMap<Integer,List<Tok>> map = new HashMap<Integer,List<Tok>>();
boolean needRaw=false;
- int pos=0;
+ int pos=0, reflectionCount = -1;
for (AttributeSource t : tokens) {
- if (!t.toString().equals(ft.indexedToReadable(t.toString()))) {
+ String text = t.addAttribute(CharTermAttribute.class).toString();
+ if (!text.equals(ft.indexedToReadable(text))) {
needRaw=true;
}
@@ -371,117 +389,78 @@
map.put(pos,lst);
}
Tok tok = new Tok(t,pos);
+ // sanity check
+ if (reflectionCount < 0) {
+ reflectionCount = tok.reflected.size();
+ } else {
+ if (reflectionCount != tok.reflected.size())
+ throw new RuntimeException("Should not happen: Number of reflected entries differs for position=" + pos);
+ }
lst.add(tok);
}
List<Tok>[] arr = (List<Tok>[])map.values().toArray(new ArrayList[map.size()]);
- /* Jetty 6.1.3 miscompiles this generics version...
- Arrays.sort(arr, new Comparator<List<Tok>>() {
- public int compare(List<Tok> toks, List<Tok> toks1) {
- return toks.get(0).pos - toks1.get(0).pos;
- }
- }
- */
-
+ // Jetty 6.1.3 miscompiles a generics-enabled version..., without generics:
Arrays.sort(arr, new Comparator() {
public int compare(Object toks, Object toks1) {
return ((List<Tok>)toks).get(0).pos - ((List<Tok>)toks1).get(0).pos;
}
- }
-
-
- );
+ });
out.println("<table width=\"auto\" class=\"analysis\" border=\"1\">");
if (verbose) {
- printRow(out,"term position", arr, new ToStr() {
- public String toStr(Object o) {
- return Integer.toString(((Tok)o).pos);
+ printRow(out, "position", "calculated from " + PositionIncrementAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return Integer.toString(t.pos);
}
- }
- ,false
- ,verbose
- ,null);
+ },false,verbose,null);
}
-
- printRow(out,"term text", arr, new ToStr() {
- public String toStr(Object o) {
- return ft.indexedToReadable( ((Tok)o).toPrintableString() );
+ printRow(out, "term", CharTermAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ return ft.indexedToReadable(t.term);
}
- }
- ,true
- ,verbose
- ,match
- );
+ },true,verbose,match);
- if (needRaw) {
- printRow(out,"raw text", arr, new ToStr() {
- public String toStr(Object o) {
+ if (needRaw && verbose) {
+ printRow(out, "raw term", CharTermAttribute.class.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
// page is UTF-8, so anything goes.
- return ((Tok)o).toPrintableString();
+ return t.term;
}
- }
- ,true
- ,verbose
- ,match
- );
+ },true,verbose,match);
}
if (verbose) {
- printRow(out,"term type", arr, new ToStr() {
- public String toStr(Object o) {
- String tt = ((Tok)o).token.addAttribute(TypeAttribute.class).type();
- if (tt == null) {
- return "null";
- } else {
- return tt;
- }
- }
- }
- ,true
- ,verbose,
- null
- );
- }
-
- if (verbose) {
- printRow(out,"source start,end", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- return Integer.toString(t.addAttribute(OffsetAttribute.class).startOffset()) + ',' + t.addAttribute(OffsetAttribute.class).endOffset() ;
- }
- }
- ,true
- ,verbose
- ,null
- );
- }
-
- if (verbose) {
- printRow(out,"payload", arr, new ToStr() {
- public String toStr(Object o) {
- AttributeSource t = ((Tok)o).token;
- Payload p = t.addAttribute(PayloadAttribute.class).getPayload();
- if( null != p ) {
- BigInteger bi = new BigInteger( p.getData() );
- String ret = bi.toString( 16 );
- if (ret.length() % 2 != 0) {
- // Pad with 0
- ret = "0"+ret;
+ for (int att=0; att < reflectionCount; att++) {
+ final ReflectItem item0 = arr[0].get(0).reflected.get(att);
+ final int i = att;
+ printRow(out, item0.key, item0.attClass.getName(), arr, new TokToStr() {
+ public String toStr(Tok t) {
+ final ReflectItem item = t.reflected.get(i);
+ if (item0.attClass != item.attClass || !item0.key.equals(item.key))
+ throw new RuntimeException("Should not happen: attribute types suddenly change at position=" + t.pos);
+ if (item.value instanceof Payload) {
+ Payload p = (Payload) item.value;
+ if( null != p ) {
+ BigInteger bi = new BigInteger( p.getData() );
+ String ret = bi.toString( 16 );
+ if (ret.length() % 2 != 0) {
+ // Pad with 0
+ ret = "0"+ret;
+ }
+ //TODO maybe fix: ret += isPayloadString(p);
+ return ret;
+ }
+ return "";
+ } else {
+ return (item.value != null) ? item.value.toString() : "";
}
- ret += isPayloadString( p );
- return ret;
}
- return "";
- }
+ },true,verbose,null);
}
- ,true
- ,verbose
- ,null
- );
}
out.println("</table>");