You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2009/06/13 00:26:04 UTC
svn commit: r784297 - in /lucene/java/trunk: contrib/
contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/
contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/
src/java/org/apache/lucene/util/ src/test/org/apache/lucene/util/
Author: gsingers
Date: Fri Jun 12 22:26:01 2009
New Revision: 784297
URL: http://svn.apache.org/viewvc?rev=784297&view=rev
Log:
LUCENE-1676: in-stream payload support
Added:
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
lucene/java/trunk/src/test/org/apache/lucene/util/ArrayUtilTest.java
Modified:
lucene/java/trunk/contrib/CHANGES.txt
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=784297&r1=784296&r2=784297&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Fri Jun 12 22:26:01 2009
@@ -60,6 +60,9 @@
sentences properly. SmartChineseAnalyzer uses a Hidden Markov
Model to tokenize Chinese words in a more intelligent way.
(Xiaoping Gao via Mike McCandless)
+
+
+6. LUCENE-1676: Added DelimitedPayloadTokenFilter class for automatically adding payloads "in-stream" (Grant Ingersoll)
Optimizations
Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java (added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/AbstractEncoder.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,14 @@
+package org.apache.lucene.analysis.payloads;
+
+import org.apache.lucene.index.Payload;
+
+
+/**
+ *
+ *
+ **/
+public abstract class AbstractEncoder implements PayloadEncoder{
+ public Payload encode(char[] buffer) {
+ return encode(buffer, 0, buffer.length);
+ }
+}
Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java (added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,109 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+import java.io.IOException;
+
+
+/**
+ * Characters before the delimiter are the "token", those after are the payload.
+ * <p/>
+ * For example, if the delimiter is '|', then for the string "foo|bar", foo is the token
+ * and "bar" is a payload.
+ * <p/>
+ * Note, you can also include a {@link org.apache.lucene.analysis.payloads.PayloadEncoder} to convert the payload in an appropriate way (from characters to bytes).
+ * <p/>
+ * Note make sure your Tokenizer doesn't split on the delimiter, or this won't work
+ *
+ * @see PayloadEncoder
+ */
+public class DelimitedPayloadTokenFilter extends TokenFilter {
+ public static final char DEFAULT_DELIMITER = '|';
+ protected char delimiter = DEFAULT_DELIMITER;
+ protected TermAttribute termAtt;
+ protected PayloadAttribute payAtt;
+ protected PayloadEncoder encoder;
+
+ /**
+ * Construct a token stream filtering the given input.
+ */
+ protected DelimitedPayloadTokenFilter(TokenStream input) {
+ this(input, DEFAULT_DELIMITER, new IdentityEncoder());
+ }
+
+
+ public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
+ super(input);
+ termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+ payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+ this.delimiter = delimiter;
+ this.encoder = encoder;
+ }
+
+ public boolean incrementToken() throws IOException {
+ boolean result = false;
+ if (input.incrementToken()) {
+ final char[] buffer = termAtt.termBuffer();
+ final int length = termAtt.termLength();
+ //look for the delimiter
+ boolean seen = false;
+ for (int i = 0; i < length; i++) {
+ if (buffer[i] == delimiter) {
+ termAtt.setTermBuffer(buffer, 0, i);
+ payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
+ seen = true;
+ break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same
+ }
+ }
+ if (seen == false) {
+ //no delimiter
+ payAtt.setPayload(null);
+ }
+ result = true;
+ }
+ return result;
+ }
+
+
+ public Token next(Token reusableToken) throws IOException {
+ Token result = input.next(reusableToken);
+ if (result != null) {
+ final char[] buffer = result.termBuffer();
+ final int length = result.termLength();
+ boolean seen = false;
+ for (int i = 0; i < length; i++) {
+ if (buffer[i] == delimiter) {
+ result.setTermBuffer(buffer, 0, i);
+ result.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
+ seen = true;
+ break;//at this point, we know the whole piece, so we can exit. If we don't see the delimiter, then the termAtt is the same
+ }
+ }
+ if (seen == false) {
+ //no delimiter
+ payAtt.setPayload(null);
+ }
+ }
+ return result;
+ }
+}
Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java (added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/FloatEncoder.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,35 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+
+
+/**
+ * Encode a character array Float as a {@link org.apache.lucene.index.Payload}.
+ *
+ **/
+public class FloatEncoder extends AbstractEncoder implements PayloadEncoder {
+
+ public Payload encode(char[] buffer, int offset, int length) {
+ Payload result = new Payload();
+ float payload = Float.parseFloat(new String(buffer, offset, length));//TODO: improve this so that we don't have to new Strings
+ byte[] bytes = PayloadHelper.encodeFloat(payload);
+ result.setData(bytes);
+ return result;
+ }
+}
Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java (added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,57 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.io.UnsupportedEncodingException;
+
+
+/**
+ * Does nothing other than convert the char array to a byte array using the specified encoding.
+ *
+ **/
+public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder{
+
+ protected Charset charset = Charset.forName("UTF-8");
+ protected String charsetName = "UTF-8"; //argh, stupid 1.4
+
+ public IdentityEncoder() {
+ }
+
+ public IdentityEncoder(Charset charset) {
+ this.charset = charset;
+ charsetName = charset.name();
+ }
+
+
+ public Payload encode(char[] buffer, int offset, int length) {
+ //what's the most efficient way to get a byte [] from a char[] array
+ //Do we have to go through String?
+ String tmp = new String(buffer, offset, length);
+ Payload result = null;//Can we avoid allocating by knowing where using the new API?
+ try {
+ result = new Payload(tmp.getBytes(charsetName));
+ } catch (UnsupportedEncodingException e) {
+ //should never hit this, since we get the name from the Charset
+ }
+
+ return result;
+ }
+}
Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java (added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/IntegerEncoder.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.ArrayUtil;
+
+
+/**
+ * Encode a character array Integer as a {@link org.apache.lucene.index.Payload}.
+ *
+ **/
+public class IntegerEncoder extends AbstractEncoder implements PayloadEncoder {
+
+ public Payload encode(char[] buffer, int offset, int length) {
+ Payload result = new Payload();
+ int payload = ArrayUtil.parseInt(buffer, offset, length);//TODO: improve this so that we don't have to new Strings
+ byte[] bytes = PayloadHelper.encodeInt(payload);
+ result.setData(bytes);
+ return result;
+ }
+}
\ No newline at end of file
Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java (added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadEncoder.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+
+
+/**
+ * Mainly for use with the DelimitedPayloadTokenFilter, converts char buffers to Payload
+ * <p/>
+ * NOTE: This interface is subject to change
+ *
+ **/
+public interface PayloadEncoder {
+
+ Payload encode(char[] buffer);
+
+ /**
+ * Convert a char array to a {@link org.apache.lucene.index.Payload}
+ * @param buffer
+ * @param offset
+ * @param length
+ * @return
+ */
+ Payload encode(char [] buffer, int offset, int length);
+}
Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java?rev=784297&r1=784296&r2=784297&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java Fri Jun 12 22:26:01 2009
@@ -31,6 +31,10 @@
return encodeInt(Float.floatToIntBits(payload), data, offset);
}
+ public static byte[] encodeInt(int payload){
+ return encodeInt(payload, new byte[4], 0);
+ }
+
public static byte[] encodeInt(int payload, byte[] data, int offset){
data[offset] = (byte)(payload >> 24);
data[offset + 1] = (byte)(payload >> 16);
Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,139 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.io.StringReader;
+
+
+/**
+ *
+ *
+ **/
+public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
+
+ public void testPayloads() throws Exception {
+ String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+ TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
+ PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
+ assertTermEquals("The", filter, termAtt, payAtt, null);
+ assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+ assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+ assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
+ assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8"));
+ assertTermEquals("over", filter, termAtt, payAtt, null);
+ assertTermEquals("the", filter, termAtt, payAtt, null);
+ assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+ assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
+ assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
+ assertFalse(filter.incrementToken());
+ }
+
+ public void testNext() throws Exception {
+
+ String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)));
+ assertTermEquals("The", filter, null);
+ assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
+ assertTermEquals("red", filter, "JJ".getBytes("UTF-8"));
+ assertTermEquals("fox", filter, "NN".getBytes("UTF-8"));
+ assertTermEquals("jumped", filter, "VB".getBytes("UTF-8"));
+ assertTermEquals("over", filter, null);
+ assertTermEquals("the", filter, null);
+ assertTermEquals("lazy", filter, "JJ".getBytes("UTF-8"));
+ assertTermEquals("brown", filter, "JJ".getBytes("UTF-8"));
+ assertTermEquals("dogs", filter, "NN".getBytes("UTF-8"));
+ assertTrue(filter.next(new Token()) == null);
+ }
+
+
+ public void testFloatEncoding() throws Exception {
+ String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new FloatEncoder());
+ TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
+ PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
+ assertTermEquals("The", filter, termAtt, payAtt, null);
+ assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
+ assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
+ assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f));
+ assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f));
+ assertTermEquals("over", filter, termAtt, payAtt, null);
+ assertTermEquals("the", filter, termAtt, payAtt, null);
+ assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f));
+ assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f));
+ assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f));
+ assertFalse(filter.incrementToken());
+ }
+
+ public void testIntEncoding() throws Exception {
+ String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
+ DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(new StringReader(test)), '|', new IntegerEncoder());
+ TermAttribute termAtt = (TermAttribute) filter.getAttribute(TermAttribute.class);
+ PayloadAttribute payAtt = (PayloadAttribute) filter.getAttribute(PayloadAttribute.class);
+ assertTermEquals("The", filter, termAtt, payAtt, null);
+ assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
+ assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
+ assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
+ assertTermEquals("jumped", filter, termAtt, payAtt, null);
+ assertTermEquals("over", filter, termAtt, payAtt, null);
+ assertTermEquals("the", filter, termAtt, payAtt, null);
+ assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5));
+ assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
+ assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
+ assertFalse(filter.incrementToken());
+ }
+
+ void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
+ Token tok = new Token();
+ assertTrue(stream.next(tok) != null);
+ assertEquals(expected, tok.term());
+ Payload payload = tok.getPayload();
+ if (payload != null) {
+ assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
+ for (int i = 0; i < expectPay.length; i++) {
+ assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
+
+ }
+ } else {
+ assertTrue("expectPay is not null and it should be", expectPay == null);
+ }
+ }
+
+
+ void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
+ assertTrue(stream.incrementToken());
+ assertEquals(expected, termAtt.term());
+ Payload payload = payAtt.getPayload();
+ if (payload != null) {
+ assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
+ for (int i = 0; i < expectPay.length; i++) {
+ assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
+
+ }
+ } else {
+ assertTrue("expectPay is not null and it should be", expectPay == null);
+ }
+ }
+}
Modified: lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java?rev=784297&r1=784296&r2=784297&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/util/ArrayUtil.java Fri Jun 12 22:26:01 2009
@@ -7,9 +7,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -18,6 +18,107 @@
*/
public final class ArrayUtil {
+ /*
+ Begin Apache Harmony code
+
+ Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java
+
+ */
+
+ /**
+ * Parses the string argument as if it was an int value and returns the
+ * result. Throws NumberFormatException if the string does not represent an
+ * int quantity.
+ *
+ * @param chars a string representation of an int quantity.
+ * @return int the value represented by the argument
+ * @throws NumberFormatException if the argument could not be parsed as an int quantity.
+ */
+ public static int parseInt(char[] chars) throws NumberFormatException {
+ return parseInt(chars, 0, chars.length, 10);
+ }
+
+ /**
+ * Parses a char array into an int.
+ * @param chars the character array
+ * @param offset The offset into the array
+ * @param len The length
+ * @return the int
+ * @throws NumberFormatException if it can't parse
+ */
+ public static int parseInt(char[] chars, int offset, int len) throws NumberFormatException {
+ return parseInt(chars, offset, len, 10);
+ }
+
+ /**
+ * Parses the string argument as if it was an int value and returns the
+ * result. Throws NumberFormatException if the string does not represent an
+ * int quantity. The second argument specifies the radix to use when parsing
+ * the value.
+ *
+ * @param chars a string representation of an int quantity.
+ * @param radix the base to use for conversion.
+ * @return int the value represented by the argument
+ * @throws NumberFormatException if the argument could not be parsed as an int quantity.
+ */
+ public static int parseInt(char[] chars, int offset, int len, int radix)
+ throws NumberFormatException {
+ if (chars == null || radix < Character.MIN_RADIX
+ || radix > Character.MAX_RADIX) {
+ throw new NumberFormatException();
+ }
+ int i = 0;
+ if (len == 0) {
+ throw new NumberFormatException("chars length is 0");
+ }
+ boolean negative = chars[offset + i] == '-';
+ if (negative && ++i == len) {
+ throw new NumberFormatException("can't convert to an int");
+ }
+ if (negative == true){
+ offset++;
+ len--;
+ }
+ return parse(chars, offset, len, radix, negative);
+ }
+
+
+ private static int parse(char[] chars, int offset, int len, int radix,
+ boolean negative) throws NumberFormatException {
+ int max = Integer.MIN_VALUE / radix;
+ int result = 0;
+ for (int i = 0; i < len; i++){
+ int digit = Character.digit(chars[i + offset], radix);
+ if (digit == -1) {
+ throw new NumberFormatException("Unable to parse");
+ }
+ if (max > result) {
+ throw new NumberFormatException("Unable to parse");
+ }
+ int next = result * radix - digit;
+ if (next > result) {
+ throw new NumberFormatException("Unable to parse");
+ }
+ result = next;
+ }
+ /*while (offset < len) {
+
+ }*/
+ if (!negative) {
+ result = -result;
+ if (result < 0) {
+ throw new NumberFormatException("Unable to parse");
+ }
+ }
+ return result;
+ }
+
+
+ /*
+
+ END APACHE HARMONY CODE
+ */
+
public static int getNextSize(int targetSize) {
/* This over-allocates proportional to the list size, making room
@@ -35,7 +136,7 @@
// Only reallocate if we are "substantially" smaller.
// This saves us from "running hot" (constantly making a
// bit bigger then a bit smaller, over and over):
- if (newSize < currentSize/2)
+ if (newSize < currentSize / 2)
return newSize;
else
return currentSize;
@@ -51,7 +152,7 @@
}
public static int[] grow(int[] array) {
- return grow(array, 1+array.length);
+ return grow(array, 1 + array.length);
}
public static int[] shrink(int[] array, int targetSize) {
@@ -74,7 +175,7 @@
}
public static long[] grow(long[] array) {
- return grow(array, 1+array.length);
+ return grow(array, 1 + array.length);
}
public static long[] shrink(long[] array, int targetSize) {
@@ -97,7 +198,7 @@
}
public static byte[] grow(byte[] array) {
- return grow(array, 1+array.length);
+ return grow(array, 1 + array.length);
}
public static byte[] shrink(byte[] array, int targetSize) {
@@ -110,21 +211,25 @@
return array;
}
- /** Returns hash of chars in range start (inclusive) to
- * end (inclusive) */
+ /**
+ * Returns hash of chars in range start (inclusive) to
+ * end (inclusive)
+ */
public static int hashCode(char[] array, int start, int end) {
int code = 0;
- for(int i=end-1;i>=start;i--)
- code = code*31 + array[i];
+ for (int i = end - 1; i >= start; i--)
+ code = code * 31 + array[i];
return code;
}
- /** Returns hash of chars in range start (inclusive) to
- * end (inclusive) */
+ /**
+ * Returns hash of chars in range start (inclusive) to
+ * end (inclusive)
+ */
public static int hashCode(byte[] array, int start, int end) {
int code = 0;
- for(int i=end-1;i>=start;i--)
- code = code*31 + array[i];
+ for (int i = end - 1; i >= start; i--)
+ code = code * 31 + array[i];
return code;
}
}
Added: lucene/java/trunk/src/test/org/apache/lucene/util/ArrayUtilTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/util/ArrayUtilTest.java?rev=784297&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/util/ArrayUtilTest.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/util/ArrayUtilTest.java Fri Jun 12 22:26:01 2009
@@ -0,0 +1,57 @@
+package org.apache.lucene.util;
+
+import junit.framework.TestCase;
+
+
+/**
+ *
+ *
+ **/
+public class ArrayUtilTest extends TestCase {
+
+ public void testParseInt() throws Exception {
+ int test;
+ try {
+ test = ArrayUtil.parseInt("".toCharArray());
+ assertTrue(false);
+ } catch (NumberFormatException e) {
+ //expected
+ }
+ try {
+ test = ArrayUtil.parseInt("foo".toCharArray());
+ assertTrue(false);
+ } catch (NumberFormatException e) {
+ //expected
+ }
+ try {
+ test = ArrayUtil.parseInt(String.valueOf(Long.MAX_VALUE).toCharArray());
+ assertTrue(false);
+ } catch (NumberFormatException e) {
+ //expected
+ }
+ try {
+ test = ArrayUtil.parseInt("0.34".toCharArray());
+ assertTrue(false);
+ } catch (NumberFormatException e) {
+ //expected
+ }
+
+ try {
+ test = ArrayUtil.parseInt("1".toCharArray());
+ assertTrue(test + " does not equal: " + 1, test == 1);
+ test = ArrayUtil.parseInt("-10000".toCharArray());
+ assertTrue(test + " does not equal: " + -10000, test == -10000);
+ test = ArrayUtil.parseInt("1923".toCharArray());
+ assertTrue(test + " does not equal: " + 1923, test == 1923);
+ test = ArrayUtil.parseInt("-1".toCharArray());
+ assertTrue(test + " does not equal: " + -1, test == -1);
+ test = ArrayUtil.parseInt("foo 1923 bar".toCharArray(), 4, 4);
+ assertTrue(test + " does not equal: " + 1923, test == 1923);
+ } catch (NumberFormatException e) {
+ e.printStackTrace();
+ assertTrue(false);
+ }
+
+ }
+
+}