You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ry...@apache.org on 2011/05/06 01:30:05 UTC
svn commit: r1099999 - in /lucene/dev/trunk:
modules/analysis/common/src/java/org/apache/lucene/analysis/path/
modules/analysis/common/src/test/org/apache/lucene/analysis/path/
solr/src/java/org/apache/solr/analysis/
Author: ryan
Date: Thu May 5 23:30:05 2011
New Revision: 1099999
URL: http://svn.apache.org/viewvc?rev=1099999&view=rev
Log:
LUCENE-3071: Add ReversePathHierarchyTokenizer and enable skip on PathHierarchyTokenizer
Added:
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (with props)
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java (with props)
Modified:
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java?rev=1099999&r1=1099998&r2=1099999&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java Thu May 5 23:30:05 2011
@@ -25,57 +25,71 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
- *
+ *
* Take something like:
- *
+ *
* <pre>
- * /soemthing/something/else
+ * /something/something/else
* </pre>
- *
+ *
* and make:
- *
+ *
* <pre>
- * /soemthing
- * /soemthing/something
- * /soemthing/something/else
+ * /something
+ * /something/something
+ * /something/something/else
* </pre>
- *
*/
public class PathHierarchyTokenizer extends Tokenizer {
public PathHierarchyTokenizer(Reader input) {
- this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER);
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
}
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
- this(input, bufferSize, delimiter, delimiter);
+ this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
}
public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
- this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement);
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
}
- public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
+ public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
+ }
+
+ public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
super(input);
termAtt.resizeBuffer(bufferSize);
+
this.delimiter = delimiter;
this.replacement = replacement;
- endDelimiter = false;
+ this.skip = skip;
resultToken = new StringBuilder(bufferSize);
}
-
+
private static final int DEFAULT_BUFFER_SIZE = 1024;
public static final char DEFAULT_DELIMITER = '/';
+ public static final int DEFAULT_SKIP = 0;
+
private final char delimiter;
private final char replacement;
-
+ private final int skip;
+
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+ private int startPosition = 0;
private int finalOffset = 0;
- private boolean endDelimiter;
+ private int skipped = 0;
+ private boolean endDelimiter = false;
private StringBuilder resultToken;
+
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
@@ -97,43 +111,69 @@ public class PathHierarchyTokenizer exte
while (true) {
int c = input.read();
- if( c < 0 ) {
- length += resultToken.length();
- termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
- if( added ){
- resultToken.setLength(0);
- resultToken.append(termAtt.buffer(), 0, length);
- }
- return added;
- }
- added = true;
- if( c == delimiter ) {
- if( length > 0 ){
- endDelimiter = true;
- break;
+ if( c < 0 ){
+ if( skipped > skip ) {
+ length += resultToken.length();
+ termAtt.setLength(length);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
+ if( added ){
+ resultToken.setLength(0);
+ resultToken.append(termAtt.buffer(), 0, length);
+ }
+ return added;
}
else{
- termAtt.append(replacement);
+ finalOffset = correctOffset(startPosition + length);
+ return false;
+ }
+ }
+ if( !added ){
+ added = true;
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(c == delimiter ? replacement : (char)c);
length++;
}
+ else {
+ startPosition++;
+ }
}
else {
- termAtt.append((char)c);
- length++;
+ if( c == delimiter ){
+ if( skipped > skip ){
+ endDelimiter = true;
+ break;
+ }
+ skipped++;
+ if( skipped > skip ){
+ termAtt.append(replacement);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
+ else {
+ if( skipped > skip ){
+ termAtt.append((char)c);
+ length++;
+ }
+ else {
+ startPosition++;
+ }
+ }
}
}
-
length += resultToken.length();
termAtt.setLength(length);
- finalOffset = correctOffset(length);
- offsetAtt.setOffset(correctOffset(0), finalOffset);
+ finalOffset = correctOffset(startPosition + length);
+ offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
resultToken.setLength(0);
resultToken.append(termAtt.buffer(), 0, length);
return true;
}
-
+
@Override
public final void end() {
// set final offset
@@ -146,5 +186,6 @@ public class PathHierarchyTokenizer exte
resultToken.setLength(0);
finalOffset = 0;
endDelimiter = false;
+ skipped = 0;
}
}
Added: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java?rev=1099999&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java Thu May 5 23:30:05 2011
@@ -0,0 +1,173 @@
+package org.apache.lucene.analysis.path;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ *
+ * Take something like:
+ *
+ * <pre>
+ * www.site.co.uk
+ * </pre>
+ *
+ * and make:
+ *
+ * <pre>
+ * www.site.co.uk
+ * site.co.uk
+ * co.uk
+ * uk
+ * </pre>
+ *
+ */
+public class ReversePathHierarchyTokenizer extends Tokenizer {
+
+ public ReversePathHierarchyTokenizer(Reader input) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
+ this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
+ this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
+ this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
+ super(input);
+ termAtt.resizeBuffer(bufferSize);
+ this.delimiter = delimiter;
+ this.replacement = replacement;
+ this.skip = skip;
+ resultToken = new StringBuilder(bufferSize);
+ resultTokenBuffer = new char[bufferSize];
+ delimiterPositions = new ArrayList<Integer>(bufferSize/10);
+ }
+
+ private static final int DEFAULT_BUFFER_SIZE = 1024;
+ public static final char DEFAULT_DELIMITER = '/';
+ public static final int DEFAULT_SKIP = 0;
+
+ private final char delimiter;
+ private final char replacement;
+ private final int skip;
+
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+
+ private int endPosition = 0;
+ private int finalOffset = 0;
+ private int skipped = 0;
+ private StringBuilder resultToken;
+
+ private List<Integer> delimiterPositions;
+ private int delimitersCount = -1;
+ private char[] resultTokenBuffer;
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ clearAttributes();
+ if(delimitersCount == -1){
+ int length = 0;
+ delimiterPositions.add(0);
+ while (true) {
+ int c = input.read();
+ if( c < 0 ) {
+ break;
+ }
+ length++;
+ if( c == delimiter ) {
+ delimiterPositions.add(length);
+ resultToken.append(replacement);
+ }
+ else{
+ resultToken.append((char)c);
+ }
+ }
+ delimitersCount = delimiterPositions.size();
+ if( delimiterPositions.get(delimitersCount-1) < length ){
+ delimiterPositions.add(length);
+ delimitersCount++;
+ }
+ if( resultTokenBuffer.length < resultToken.length() ){
+ resultTokenBuffer = new char[resultToken.length()];
+ }
+ resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0);
+ resultToken.setLength(0);
+ endPosition = delimiterPositions.get(delimitersCount-1 - skip);
+ finalOffset = correctOffset(length);
+ posAtt.setPositionIncrement(1);
+ }
+ else{
+ posAtt.setPositionIncrement(0);
+ }
+
+ while( skipped < delimitersCount-skip-1 ){
+ int start = delimiterPositions.get(skipped);
+ termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
+ offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
+ skipped++;
+ return true;
+ }
+
+ return false;
+ }
+
+ @Override
+ public final void end() {
+ // set final offset
+ offsetAtt.setOffset(finalOffset, finalOffset);
+ }
+
+ @Override
+ public void reset(Reader input) throws IOException {
+ super.reset(input);
+ resultToken.setLength(0);
+ finalOffset = 0;
+ skipped = 0;
+ delimitersCount = -1;
+ delimiterPositions.clear();
+ }
+}
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java?rev=1099999&r1=1099998&r2=1099999&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java Thu May 5 23:30:05 2011
@@ -127,4 +127,70 @@ public class TestPathHierarchyTokenizer
new int[]{1, 0, 0, 0},
path.length());
}
+
+ public void testBasicSkip() throws Exception {
+ String path = "/a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{2, 2},
+ new int[]{4, 6},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterSkip() throws Exception {
+ String path = "/a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{2, 2, 2},
+ new int[]{4, 6, 7},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharSkip() throws Exception {
+ String path = "a/b/c";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c"},
+ new int[]{1, 1},
+ new int[]{3, 5},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterSkip() throws Exception {
+ String path = "a/b/c/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/b", "/b/c", "/b/c/"},
+ new int[]{1, 1, 1},
+ new int[]{3, 5, 6},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterSkip() throws Exception {
+ String path = "/";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{},
+ new int[]{},
+ new int[]{},
+ new int[]{},
+ path.length());
+ }
+
+ public void testOnlyDelimitersSkip() throws Exception {
+ String path = "//";
+ PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{1},
+ new int[]{2},
+ new int[]{1},
+ path.length());
+ }
}
Added: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java?rev=1099999&view=auto
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java (added)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestReversePathHierarchyTokenizer.java Thu May 5 23:30:05 2011
@@ -0,0 +1,157 @@
+package org.apache.lucene.analysis.path;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
+
+ public void testBasicReverse() throws Exception {
+ String path = "/a/b/c";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"/a/b/c", "a/b/c", "b/c", "c"},
+ new int[]{0, 1, 3, 5},
+ new int[]{6, 6, 6, 6},
+ new int[]{1, 0, 0, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterReverse() throws Exception {
+ String path = "/a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"/a/b/c/", "a/b/c/", "b/c/", "c/"},
+ new int[]{0, 1, 3, 5},
+ new int[]{7, 7, 7, 7},
+ new int[]{1, 0, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharReverse() throws Exception {
+ String path = "a/b/c";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/c", "b/c", "c"},
+ new int[]{0, 2, 4},
+ new int[]{5, 5, 5},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterReverse() throws Exception {
+ String path = "a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/c/", "b/c/", "c/"},
+ new int[]{0, 2, 4},
+ new int[]{6, 6, 6},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterReverse() throws Exception {
+ String path = "/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{0},
+ new int[]{1},
+ new int[]{1},
+ path.length());
+ }
+
+ public void testOnlyDelimitersReverse() throws Exception {
+ String path = "//";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
+ assertTokenStreamContents(t,
+ new String[]{"//", "/"},
+ new int[]{0, 1},
+ new int[]{2, 2},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testEndOfDelimiterReverseSkip() throws Exception {
+ String path = "/a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/a/b/", "a/b/", "b/"},
+ new int[]{0, 1, 3},
+ new int[]{5, 5, 5},
+ new int[]{1, 0, 0},
+ path.length());
+ }
+
+ public void testStartOfCharReverseSkip() throws Exception {
+ String path = "a/b/c";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/", "b/"},
+ new int[]{0, 2},
+ new int[]{4, 4},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testStartOfCharEndOfDelimiterReverseSkip() throws Exception {
+ String path = "a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"a/b/", "b/"},
+ new int[]{0, 2},
+ new int[]{4, 4},
+ new int[]{1, 0},
+ path.length());
+ }
+
+ public void testOnlyDelimiterReverseSkip() throws Exception {
+ String path = "/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{},
+ new int[]{},
+ new int[]{},
+ new int[]{},
+ path.length());
+ }
+
+ public void testOnlyDelimitersReverseSkip() throws Exception {
+ String path = "//";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
+ assertTokenStreamContents(t,
+ new String[]{"/"},
+ new int[]{0},
+ new int[]{1},
+ new int[]{1},
+ path.length());
+ }
+
+ public void testReverseSkip2() throws Exception {
+ String path = "/a/b/c/";
+ ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 2 );
+ assertTokenStreamContents(t,
+ new String[]{"/a/", "a/"},
+ new int[]{0, 1},
+ new int[]{3, 3},
+ new int[]{1, 0},
+ path.length());
+ }
+}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java?rev=1099999&r1=1099998&r2=1099999&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java Thu May 5 23:30:05 2011
@@ -21,6 +21,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
+import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
/**
@@ -37,6 +38,8 @@ public class PathHierarchyTokenizerFacto
private char delimiter;
private char replacement;
+ private boolean reverse = false;
+ private int skip = PathHierarchyTokenizer.DEFAULT_SKIP;
/**
* Require a configured pattern
@@ -70,10 +73,23 @@ public class PathHierarchyTokenizerFacto
else{
replacement = delimiter;
}
+
+ v = args.get( "reverse" );
+ if( v != null ){
+ reverse = "true".equals( v );
+ }
+
+ v = args.get( "skip" );
+ if( v != null ){
+ skip = Integer.parseInt( v );
+ }
}
public Tokenizer create(Reader input) {
- return new PathHierarchyTokenizer(input, delimiter, replacement);
+ if( reverse ) {
+ return new ReversePathHierarchyTokenizer(input, delimiter, replacement, skip);
+ }
+ return new PathHierarchyTokenizer(input, delimiter, replacement, skip);
}
}