You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by "mohammad norouzi (JIRA)" <ji...@apache.org> on 2007/06/12 09:54:25 UTC
[jira] Commented: (LUCENE-530) Extend NumberTools to support
int/long/float/double to string
[ https://issues.apache.org/jira/browse/LUCENE-530?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#action_12503769 ]
mohammad norouzi commented on LUCENE-530:
-----------------------------------------
I just want to know if this works fine why they don't add it to the Lucene's API? Now I need this in my search engine program
> Extend NumberTools to support int/long/float/double to string
> -------------------------------------------------------------
>
> Key: LUCENE-530
> URL: https://issues.apache.org/jira/browse/LUCENE-530
> Project: Lucene - Java
> Issue Type: Improvement
> Components: Analysis
> Affects Versions: 1.9
> Reporter: Andy Hind
> Priority: Minor
>
> Extend Number tools to support int/long/float/double to string
> So you can search using range queries on int/long/float/double, if you want.
> Here is the basis for how NumberTools cold be extended to support int/long/double/float.
> As I only write these values to the index and fix tokenisation in searchesI was not so fussed about the reverse transformations back to Strings.
> public class NumericEncoder
> {
> /*
> * Constants for integer encoding
> */
> static int INTEGER_SIGN_MASK = 0x80000000;
> /*
> * Constants for long encoding
> */
> static long LONG_SIGN_MASK = 0x8000000000000000L;
> /*
> * Constants for float encoding
> */
> static int FLOAT_SIGN_MASK = 0x80000000;
> static int FLOAT_EXPONENT_MASK = 0x7F800000;
> static int FLOAT_MANTISSA_MASK = 0x007FFFFF;
> /*
> * Constants for double encoding
> */
> static long DOUBLE_SIGN_MASK = 0x8000000000000000L;
> static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L;
> static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL;
> private NumericEncoder()
> {
> super();
> }
> /**
> * Encode an integer into a string that orders correctly using string
> * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as
> * ffffffff.
> *
> * @param intToEncode
> * @return
> */
> public static String encode(int intToEncode)
> {
> int replacement = intToEncode ^ INTEGER_SIGN_MASK;
> return encodeToHex(replacement);
> }
> /**
> * Encode a long into a string that orders correctly using string comparison
> * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as
> * ffffffffffffffff.
> *
> * @param longToEncode
> * @return
> */
> public static String encode(long longToEncode)
> {
> long replacement = longToEncode ^ LONG_SIGN_MASK;
> return encodeToHex(replacement);
> }
> /**
> * Encode a float into a string that orders correctly according to string
> * comparison. Note that there is no negative NaN but there are codings that
> * imply this. So NaN and -Infinity may not compare as expected.
> *
> * @param floatToEncode
> * @return
> */
> public static String encode(float floatToEncode)
> {
> int bits = Float.floatToIntBits(floatToEncode);
> int sign = bits & FLOAT_SIGN_MASK;
> int exponent = bits & FLOAT_EXPONENT_MASK;
> int mantissa = bits & FLOAT_MANTISSA_MASK;
> if (sign != 0)
> {
> exponent ^= FLOAT_EXPONENT_MASK;
> mantissa ^= FLOAT_MANTISSA_MASK;
> }
> sign ^= FLOAT_SIGN_MASK;
> int replacement = sign | exponent | mantissa;
> return encodeToHex(replacement);
> }
> /**
> * Encode a double into a string that orders correctly according to string
> * comparison. Note that there is no negative NaN but there are codings that
> * imply this. So NaN and -Infinity may not compare as expected.
> *
> * @param doubleToEncode
> * @return
> */
> public static String encode(double doubleToEncode)
> {
> long bits = Double.doubleToLongBits(doubleToEncode);
> long sign = bits & DOUBLE_SIGN_MASK;
> long exponent = bits & DOUBLE_EXPONENT_MASK;
> long mantissa = bits & DOUBLE_MANTISSA_MASK;
> if (sign != 0)
> {
> exponent ^= DOUBLE_EXPONENT_MASK;
> mantissa ^= DOUBLE_MANTISSA_MASK;
> }
> sign ^= DOUBLE_SIGN_MASK;
> long replacement = sign | exponent | mantissa;
> return encodeToHex(replacement);
> }
> private static String encodeToHex(int i)
> {
> char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' };
> int charPos = 8;
> do
> {
> buf[--charPos] = DIGITS[i & MASK];
> i >>>= 4;
> }
> while (i != 0);
> return new String(buf);
> }
> private static String encodeToHex(long l)
> {
> char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' };
> int charPos = 16;
> do
> {
> buf[--charPos] = DIGITS[(int) l & MASK];
> l >>>= 4;
> }
> while (l != 0);
> return new String(buf);
> }
> private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e',
> 'f' };
> private static final int MASK = (1 << 4) - 1;
> }
> public class NumericEncodingTest extends TestCase
> {
> public NumericEncodingTest()
> {
> super();
> }
> public NumericEncodingTest(String arg0)
> {
> super(arg0);
> }
> /**
> * Do an exhaustive test for integers
> *
> */
> public void xtestAllIntegerEncodings()
> {
> String lastString = null;
> String nextString = null;
> for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++)
> {
> nextString = NumericEncoder.encode((int) i);
> if (lastString != null)
> {
> assertFalse(lastString.compareTo(nextString) > 0);
> }
> lastString = nextString;
> }
> }
> /**
> * Do an exhaustive test for float
> *
> */
> public void xtestAllFloatEncodings()
> {
> Float last = null;
> Float next = null;
> String lastString = null;
> String nextString = null;
> for (int sign = 1; sign >= 0; sign--)
> {
> if (sign == 0)
> {
> for (int exponent = 0; exponent <= 0xFF; exponent++)
> {
> for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++)
> {
> int bitPattern = sign << 31 | exponent << 23 | mantissa;
> next = Float.intBitsToFloat(bitPattern);
> if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0))
> {
> System.err.println(last + " > " + next);
> }
> if (!next.equals(Float.NaN))
> {
> nextString = NumericEncoder.encode(next);
> if ((lastString != null) && (lastString.compareTo(nextString) > 0))
> {
> System.err.println(lastString + " > " + nextString);
> }
> lastString = nextString;
> }
> last = next;
> }
> }
> }
> else
> {
> for (int exponent = 0xFF; exponent >= 0; exponent--)
> {
> for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--)
> {
> int bitPattern = sign << 31 | exponent << 23 | mantissa;
> next = Float.intBitsToFloat(bitPattern);
> if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0))
> {
> System.err.println(last + " > " + next);
> }
> if (!next.equals(Float.NaN))
> {
> nextString = NumericEncoder.encode(next);
> if ((lastString != null) && (lastString.compareTo(nextString) > 0))
> {
> System.err.println(lastString + " > " + nextString);
> }
> lastString = nextString;
> }
> last = next;
> }
> }
> }
> }
> }
> /*
> * Sample test for int
> */
> public void testIntegerEncoding()
> {
> assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE));
> assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1));
> assertEquals("7fffffff", NumericEncoder.encode(-1));
> assertEquals("80000000", NumericEncoder.encode(0));
> assertEquals("80000001", NumericEncoder.encode(1));
> assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1));
> assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE));
> }
> /*
> * Sample test for long
> */
> public void testLongEncoding()
> {
> assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE));
> assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE + 1));
> assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L));
> assertEquals("8000000000000000", NumericEncoder.encode(0L));
> assertEquals("8000000000000001", NumericEncoder.encode(1L));
> assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE - 1));
> assertEquals("ffffffffffffffff", NumericEncoder.encode(Long.MAX_VALUE));
> }
> /*
> * Sample test for float
> */
> public void testFloatEncoding()
> {
> assertEquals("007fffff", NumericEncoder.encode(Float.NEGATIVE_INFINITY));
> assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE));
> assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE));
> assertEquals("7fffffff", NumericEncoder.encode(-0f));
> assertEquals("80000000", NumericEncoder.encode(0f));
> assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE));
> assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE));
> assertEquals("ff800000", NumericEncoder.encode(Float.POSITIVE_INFINITY));
> assertEquals("ffc00000", NumericEncoder.encode(Float.NaN));
> }
> /*
> * Sample test for double
> */
> public void testDoubleEncoding()
> {
> assertEquals("000fffffffffffff", NumericEncoder.encode(Double.NEGATIVE_INFINITY));
> assertEquals("0010000000000000", NumericEncoder.encode(-Double.MAX_VALUE));
> assertEquals("7ffffffffffffffe", NumericEncoder.encode(-Double.MIN_VALUE));
> assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d));
> assertEquals("8000000000000000", NumericEncoder.encode(0d));
> assertEquals("8000000000000001", NumericEncoder.encode(Double.MIN_VALUE));
> assertEquals("ffefffffffffffff", NumericEncoder.encode(Double.MAX_VALUE));
> assertEquals("fff0000000000000", NumericEncoder.encode(Double.POSITIVE_INFINITY));
> assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN));
> }
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org