You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2016/03/01 18:07:13 UTC
[30/50] [abbrv] lucene-solr git commit: LUCENE-7015: Refactor spatial
module to spatial-extras
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java
new file mode 100644
index 0000000..fe3846d
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/Cell.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import com.spatial4j.core.shape.Shape;
+import com.spatial4j.core.shape.SpatialRelation;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Represents a grid cell. Cell instances are generally very transient and may be re-used
+ * internally. To get an instance, you could start with {@link SpatialPrefixTree#getWorldCell()}.
+ * And from there you could either traverse down the tree with {@link #getNextLevelCells(com.spatial4j.core.shape.Shape)},
+ * or you could read an indexed term via {@link SpatialPrefixTree#readCell(org.apache.lucene.util.BytesRef,Cell)}.
+ * When a cell is read from a term, it is comprised of just the base bytes plus optionally a leaf flag.
+ *
+ * @lucene.experimental
+ */
+public interface Cell {
+
+// If we bring this back; perhaps do so as a method that un-shares its internal state: void unshare();
+// /** Resets the state of this cell such that it is identical to {@code source}. This can be used for
+// * cloning a cell to have a safe copy, and it also might be used to position this cell
+// * before calling {@link #readCell(org.apache.lucene.util.BytesRef)} in a loop if you know the first term
+// * is going to be close to some other cell, thereby saving some computations. */
+// void copyFrom(Cell source);
+
+ /** Gets the relationship this cell has with the shape from which it was filtered from, assuming it came from a
+ * {@link CellIterator}. Arguably it belongs there but it's very convenient here. */
+ SpatialRelation getShapeRel();
+
+ /** See {@link #getShapeRel()}.
+ * @lucene.internal */
+ void setShapeRel(SpatialRelation rel);
+
+ /**
+ * Some cells are flagged as leaves, which are indexed as such. A leaf cell is either within some
+ * shape or it both intersects and the cell is at an accuracy threshold such that no smaller cells
+ * for the shape will be represented.
+ */
+ boolean isLeaf();
+
+ /** Set this cell to be a leaf. Warning: never call on a cell
+ * initialized to reference the same bytes from termsEnum, which should be treated as immutable.
+ * Note: not supported at level 0.
+ * @lucene.internal */
+ void setLeaf();
+
+ /**
+ * Returns the bytes for this cell, with a leaf byte <em>if this is a leaf cell</em>.
+ * The result param is used to save object allocation, though its bytes aren't used.
+ * @param result where the result goes, or null to create new
+ */
+ BytesRef getTokenBytesWithLeaf(BytesRef result);
+
+ /**
+ * Returns the bytes for this cell, without a leaf set. The bytes should sort before
+ * {@link #getTokenBytesWithLeaf(org.apache.lucene.util.BytesRef)}.
+ * The result param is used to save object allocation, though its bytes aren't used.
+ * @param result where the result goes, or null to create new
+ */
+ BytesRef getTokenBytesNoLeaf(BytesRef result);
+
+ /** Level 0 is the world (and has no parent), from then on a higher level means a smaller
+ * cell than the level before it.
+ */
+ int getLevel();
+
+ /**
+ * Gets the cells at the next grid cell level underneath this one, optionally filtered by
+ * {@code shapeFilter}. The returned cells should have {@link #getShapeRel()} set to
+ * their relation with {@code shapeFilter}. In addition, for non-points {@link #isLeaf()}
+ * must be true when that relation is WITHIN.
+ * <p>
+ * IMPORTANT: Cells returned from this iterator can be shared, as well as the bytes.
+ * <p>
+ * Precondition: Never called when getLevel() == maxLevel.
+ *
+ * @param shapeFilter an optional filter for the returned cells.
+ * @return A set of cells (no dups), sorted. Not Modifiable.
+ */
+ CellIterator getNextLevelCells(Shape shapeFilter);
+
+ /** Gets the shape for this cell; typically a Rectangle. */
+ Shape getShape();
+
+ /**
+ * Returns if the target term is within/underneath this cell; not necessarily a direct
+ * descendant.
+ * @param c the term
+ */
+ boolean isPrefixOf(Cell c);
+
+ /** Equivalent to {@code this.getTokenBytesNoLeaf(null).compareTo(fromCell.getTokenBytesNoLeaf(null))}. */
+ int compareToNoLeaf(Cell fromCell);
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java
new file mode 100644
index 0000000..1cef37a
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/CellIterator.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * An Iterator of SpatialPrefixTree Cells. The order is always sorted without duplicates.
+ *
+ * @lucene.experimental
+ */
+public abstract class CellIterator implements Iterator<Cell> {
+
+ //note: nextCell or thisCell can be non-null but neither at the same time. That's
+ // because they might return the same instance when re-used!
+
+ protected Cell nextCell;//to be returned by next(), and null'ed after
+ protected Cell thisCell;//see next() & thisCell(). Should be cleared in hasNext().
+
+ /** Returns the cell last returned from {@link #next()}. It's cleared by hasNext(). */
+ public Cell thisCell() {
+ assert thisCell != null : "Only call thisCell() after next(), not hasNext()";
+ return thisCell;
+ }
+
+ // Arguably this belongs here and not on Cell
+ //public SpatialRelation getShapeRel()
+
+ /**
+ * Gets the next cell that is >= {@code fromCell}, compared using non-leaf bytes. If it returns null then
+ * the iterator is exhausted.
+ */
+ public Cell nextFrom(Cell fromCell) {
+ while (true) {
+ if (!hasNext())
+ return null;
+ Cell c = next();//will update thisCell
+ if (c.compareToNoLeaf(fromCell) >= 0) {
+ return c;
+ }
+ }
+ }
+
+ /** This prevents sub-cells (those underneath the current cell) from being iterated to,
+ * if applicable, otherwise a NO-OP. */
+ @Override
+ public void remove() {
+ assert thisCell != null;
+ }
+
+ @Override
+ public Cell next() {
+ if (nextCell == null) {
+ if (!hasNext())
+ throw new NoSuchElementException();
+ }
+ thisCell = nextCell;
+ nextCell = null;
+ return thisCell;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java
new file mode 100644
index 0000000..13281f3
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTree.java
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import com.spatial4j.core.shape.Shape;
+
+/**
+ * A PrefixTree for date ranges in which the levels of the tree occur at natural periods of time (e.g. years,
+ * months, ...). You pass in {@link Calendar} objects with the desired fields set and the unspecified
+ * fields unset, which conveys the precision. The implementation makes some optimization assumptions about a
+ * {@link java.util.GregorianCalendar}; others could probably be supported easily.
+ * <p>
+ * Warning: If you construct a Calendar and then get something from the object like a field (e.g. year) or
+ * milliseconds, then every field is fully set by side-effect. So after setting the fields, pass it to this
+ * API first.
+ * @lucene.experimental
+ */
+public class DateRangePrefixTree extends NumberRangePrefixTree {
+
+ /*
+ WARNING java.util.Calendar is tricky to work with:
+ * If you "get" any field value, every field becomes "set". This can introduce a Heisenbug effect,
+ when in a debugger in some cases. Fortunately, Calendar.toString() doesn't apply.
+ * Beware Calendar underflow of the underlying long. If you create a Calendar from LONG.MIN_VALUE, and clear
+ a field, it will underflow and appear close to LONG.MAX_VALUE (BC to AD).
+
+ There are no doubt other reasons but those two were hard fought lessons here.
+
+ TODO Improvements:
+ * Make max precision configurable (i.e. to SECOND).
+ * Make min & max year span configurable. Use that to remove pointless top levels of the SPT.
+ If year span is > 10k, then add 1k year level. If year span is > 10k of 1k levels, add 1M level.
+ * NumberRangePrefixTree: override getTreeCellIterator for optimized case where the shape isn't a date span; use
+ FilterCellIterator of the cell stack.
+
+ */
+
+ private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+ private static Calendar CAL_TMP;//template
+ static {
+ CAL_TMP = Calendar.getInstance(UTC, Locale.ROOT);
+ CAL_TMP.clear();
+ }
+
+ private static final Calendar MINCAL = (Calendar) CAL_TMP.clone();
+ private static final Calendar MAXCAL = (Calendar) CAL_TMP.clone();
+ static {
+ MINCAL.setTimeInMillis(Long.MIN_VALUE);
+ MAXCAL.setTimeInMillis(Long.MAX_VALUE);
+ }
+ //BC years are decreasing, remember. Yet ActualMaximum is the numerically high value, ActualMinimum is 1.
+ private static final int BC_FIRSTYEAR = MINCAL.getActualMaximum(Calendar.YEAR);
+ private static final int BC_LASTYEAR = MINCAL.getActualMinimum(Calendar.YEAR);//1
+ private static final int BC_YEARS = BC_FIRSTYEAR - BC_LASTYEAR + 1;
+ private static final int AD_FIRSTYEAR = MAXCAL.getActualMinimum(Calendar.YEAR);//1
+ private static final int AD_LASTYEAR = MAXCAL.getActualMaximum(Calendar.YEAR);
+ private static final int AD_YEAR_BASE = (((BC_YEARS-1) / 1000_000)+1) * 1000_000;
+ static { assert BC_LASTYEAR == 1 && AD_FIRSTYEAR == 1; }
+
+ //how many million years are there?
+ private static final int NUM_MYEARS = (AD_YEAR_BASE + AD_LASTYEAR) / 1000_000;
+
+ private static int calFieldLen(int field) {
+ return CAL_TMP.getMaximum(field) - CAL_TMP.getMinimum(field) + 1;
+ }
+
+ private static final int[] FIELD_BY_LEVEL = {
+ -1/*unused*/, -1, -1, Calendar.YEAR, Calendar.MONTH, Calendar.DAY_OF_MONTH,
+ Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND};
+ private static final int yearLevel = 3;
+
+ public static final DateRangePrefixTree INSTANCE = new DateRangePrefixTree();
+
+ private final UnitNRShape minLV, maxLV;
+ private final UnitNRShape gregorianChangeDateLV;
+
+ protected DateRangePrefixTree() {
+ super(new int[]{//sublevels by level
+ NUM_MYEARS,
+ 1000,//1 thousand thousand-years in a million years
+ 1000,//1 thousand years in a thousand-year
+ calFieldLen(Calendar.MONTH),
+ calFieldLen(Calendar.DAY_OF_MONTH),
+ calFieldLen(Calendar.HOUR_OF_DAY),
+ calFieldLen(Calendar.MINUTE),
+ calFieldLen(Calendar.SECOND),
+ calFieldLen(Calendar.MILLISECOND),
+ });
+ maxLV = toShape((Calendar)MAXCAL.clone());
+ minLV = toShape((Calendar)MINCAL.clone());
+ if (MAXCAL instanceof GregorianCalendar) {
+ //TODO this should be a configurable param by passing a Calendar serving as a template.
+ GregorianCalendar gCal = (GregorianCalendar)MAXCAL;
+ gregorianChangeDateLV = toUnitShape(gCal.getGregorianChange());
+ } else {
+ gregorianChangeDateLV = null;
+ }
+ }
+
+ @Override
+ public int getNumSubCells(UnitNRShape lv) {
+ int cmp = comparePrefix(lv, maxLV);
+ assert cmp <= 0;
+ if (cmp == 0)//edge case (literally!)
+ return maxLV.getValAtLevel(lv.getLevel()+1);
+
+ // if using GregorianCalendar and we're after the "Gregorian change date" then we'll compute
+ // the sub-cells ourselves more efficiently without the need to construct a Calendar.
+ cmp = gregorianChangeDateLV != null ? comparePrefix(lv, gregorianChangeDateLV) : -1;
+ //TODO consider also doing fast-path if field is <= hours even if before greg change date
+ if (cmp >= 0) {
+ int result = fastSubCells(lv);
+ assert result == slowSubCells(lv) : "fast/slow numSubCells inconsistency";
+ return result;
+ } else {
+ return slowSubCells(lv);
+ }
+ }
+
+ private int fastSubCells(UnitNRShape lv) {
+ if (lv.getLevel() == yearLevel+1) {//month
+ switch (lv.getValAtLevel(lv.getLevel())) {
+ case Calendar.SEPTEMBER:
+ case Calendar.APRIL:
+ case Calendar.JUNE:
+ case Calendar.NOVEMBER:
+ return 30;
+ case Calendar.FEBRUARY:
+ //get the year (negative numbers for BC)
+ int yearAdj = lv.getValAtLevel(1) * 1_000_000;
+ yearAdj += lv.getValAtLevel(2) * 1000;
+ yearAdj += lv.getValAtLevel(3);
+ int year = yearAdj - AD_YEAR_BASE;
+ if (year % 4 == 0 && !(year % 100 == 0 && year % 400 != 0) )//leap year
+ return 29;
+ else
+ return 28;
+ default:
+ return 31;
+ }
+ } else {//typical:
+ return super.getNumSubCells(lv);
+ }
+ }
+
+ private int slowSubCells(UnitNRShape lv) {
+ int field = FIELD_BY_LEVEL[lv.getLevel()+1];
+ //short-circuit optimization (GregorianCalendar assumptions)
+ if (field == -1 || field == Calendar.YEAR || field >= Calendar.HOUR_OF_DAY)//TODO make configurable
+ return super.getNumSubCells(lv);
+ Calendar cal = toCalendar(lv);//somewhat heavyweight op; ideally should be stored on UnitNRShape somehow
+ return cal.getActualMaximum(field) - cal.getActualMinimum(field) + 1;
+ }
+
+ /** Calendar utility method:
+ * Returns a new {@link Calendar} in UTC TimeZone, ROOT Locale, with all fields cleared. */
+ public Calendar newCal() {
+ return (Calendar) CAL_TMP.clone();
+ }
+
+ /** Calendar utility method:
+ * Returns the spatial prefix tree level for the corresponding {@link java.util.Calendar} field, such as
+ * {@link java.util.Calendar#YEAR}. If there's no match, the next greatest level is returned as a negative value.
+ */
+ public int getTreeLevelForCalendarField(int calField) {
+ for (int i = yearLevel; i < FIELD_BY_LEVEL.length; i++) {
+ if (FIELD_BY_LEVEL[i] == calField) {
+ return i;
+ } else if (FIELD_BY_LEVEL[i] > calField) {
+ return -1 * i;
+ }
+ }
+ throw new IllegalArgumentException("Bad calendar field?: " + calField);
+ }
+
+ /** Calendar utility method:
+ * Gets the Calendar field code of the last field that is set prior to an unset field. It only
+ * examines fields relevant to the prefix tree. If no fields are set, it returns -1. */
+ public int getCalPrecisionField(Calendar cal) {
+ int lastField = -1;
+ for (int level = yearLevel; level < FIELD_BY_LEVEL.length; level++) {
+ int field = FIELD_BY_LEVEL[level];
+ if (!cal.isSet(field))
+ break;
+ lastField = field;
+ }
+ return lastField;
+ }
+
+ /** Calendar utility method:
+ * Calls {@link Calendar#clear(int)} for every field after {@code field}. Beware of Calendar underflow. */
+ public void clearFieldsAfter(Calendar cal, int field) {
+ if (field == -1) {
+ cal.clear();
+ return;
+ }
+ int assertEra = -1;
+ assert (assertEra = (((Calendar)cal.clone()).get(Calendar.ERA))) >= 0;//a trick to only get this if assert enabled
+ for (int f = field+1; f < Calendar.FIELD_COUNT; f++) {
+ cal.clear(f);
+ }
+ assert ((Calendar)cal.clone()).get(Calendar.ERA) == assertEra : "Calendar underflow";
+ }
+
+ /** Converts {@code value} from a {@link Calendar} or {@link Date} to a {@link Shape}. Other arguments
+ * result in a {@link java.lang.IllegalArgumentException}.
+ */
+ @Override
+ public UnitNRShape toUnitShape(Object value) {
+ if (value instanceof Calendar) {
+ return toShape((Calendar) value);
+ } else if (value instanceof Date) {
+ Calendar cal = newCal();
+ cal.setTime((Date)value);
+ return toShape(cal);
+ }
+ throw new IllegalArgumentException("Expecting Calendar or Date but got: "+value.getClass());
+ }
+
+ /** Converts the Calendar into a Shape.
+ * The isSet() state of the Calendar is re-instated when done. */
+ public UnitNRShape toShape(Calendar cal) {
+ // Convert a Calendar into a stack of cell numbers
+ final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
+ try {
+ int[] valStack = new int[maxLevels];//starts at level 1, not 0
+ int len = 0;
+ if (calPrecField >= Calendar.YEAR) {//year or better precision
+ int year = cal.get(Calendar.YEAR);
+ int yearAdj = cal.get(Calendar.ERA) == 0 ? AD_YEAR_BASE - (year - 1) : AD_YEAR_BASE + year;
+
+ valStack[len++] = yearAdj / 1000_000;
+ yearAdj -= valStack[len-1] * 1000_000;
+ valStack[len++] = yearAdj / 1000;
+ yearAdj -= valStack[len-1] * 1000;
+ valStack[len++] = yearAdj;
+ for (int level = yearLevel+1; level < FIELD_BY_LEVEL.length; level++) {
+ int field = FIELD_BY_LEVEL[level];
+ if (field > calPrecField)
+ break;
+ valStack[len++] = cal.get(field) - cal.getActualMinimum(field);
+ }
+ }
+
+ return toShape(valStack, len);
+ } finally {
+ clearFieldsAfter(cal, calPrecField);//restore precision state modified by get()
+ }
+ }
+
+ /** Calls {@link #toCalendar(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}. */
+ @Override
+ public Object toObject(UnitNRShape shape) {
+ return toCalendar(shape);
+ }
+
+ /** Converts the {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape} shape to a
+ * corresponding Calendar that is cleared below its level. */
+ public Calendar toCalendar(UnitNRShape lv) {
+ if (lv.getLevel() == 0)
+ return newCal();
+ if (comparePrefix(lv, minLV) <= 0) {//shouldn't typically happen; sometimes in a debugger
+ return (Calendar) MINCAL.clone();//full precision; truncation would cause underflow
+ }
+ assert comparePrefix(lv, maxLV) <= 0;
+ Calendar cal = newCal();
+
+ int yearAdj = lv.getValAtLevel(1) * 1_000_000;
+ if (lv.getLevel() > 1) {
+ yearAdj += lv.getValAtLevel(2) * 1000;
+ if (lv.getLevel() > 2) {
+ yearAdj += lv.getValAtLevel(3);
+ }
+ }
+ if (yearAdj > AD_YEAR_BASE) {
+ cal.set(Calendar.ERA, 1);
+ cal.set(Calendar.YEAR, yearAdj - AD_YEAR_BASE);//setting the year resets the era
+ } else {
+ cal.set(Calendar.ERA, 0);//we assert this "sticks" at the end
+ cal.set(Calendar.YEAR, (AD_YEAR_BASE - yearAdj) + 1);
+ }
+ for (int level = yearLevel+1; level <= lv.getLevel(); level++) {
+ int field = FIELD_BY_LEVEL[level];
+ cal.set(field, lv.getValAtLevel(level) + cal.getActualMinimum(field));
+ }
+ assert yearAdj > AD_YEAR_BASE || ((Calendar)cal.clone()).get(Calendar.ERA) == 0 : "ERA / YEAR underflow";
+ return cal;
+ }
+
+ @Override
+ protected String toString(UnitNRShape lv) {
+ return toString(toCalendar(lv));
+ }
+
+ /** Calendar utility method:
+ * Formats the calendar to ISO-8601 format, to include proper BC handling (1BC is "0000", 2BC is "-0001", etc.);
+ * and WITHOUT a trailing 'Z'.
+ * A fully cleared calendar will yield the string "*".
+ * The isSet() state of the Calendar is re-instated when done. */
+ @SuppressWarnings("fallthrough")
+ public String toString(Calendar cal) {
+ final int calPrecField = getCalPrecisionField(cal);//must call first; getters set all fields
+ if (calPrecField == -1)
+ return "*";
+ try {
+ //TODO not fully optimized; but it's at least not used in 'search'.
+ //TODO maybe borrow code from Solr DateUtil (put in Lucene util somewhere), and have it reference this back?
+ String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSS";
+ int ptnLen = 0;
+ switch (calPrecField) {//switch fall-through is deliberate
+ case Calendar.MILLISECOND: ptnLen += 4;
+ case Calendar.SECOND: ptnLen += 3;
+ case Calendar.MINUTE: ptnLen += 3;
+ case Calendar.HOUR_OF_DAY: ptnLen += 5;
+ case Calendar.DAY_OF_MONTH: ptnLen += 3;
+ case Calendar.MONTH: ptnLen += 3;
+ case Calendar.YEAR: ptnLen += 4;
+ break;
+ default: throw new IllegalStateException(""+calPrecField);
+ }
+ pattern = pattern.substring(0, ptnLen);
+ SimpleDateFormat format = new SimpleDateFormat(pattern, Locale.ROOT);
+ format.setTimeZone(cal.getTimeZone());
+ if (cal.get(Calendar.ERA) == 0) {//BC
+ //SDF doesn't do this properly according to ISO-8601
+ // Example: 1BC == "0000" (actually 0 AD), 2BC == "-0001", 3BC == "-0002", ...
+ final int yearOrig = cal.get(Calendar.YEAR);
+ cal.set(Calendar.YEAR, yearOrig-1);
+ String str;
+ try {
+ str = format.format(cal.getTime());
+ } finally {
+ //reset to what it was
+ cal.set(Calendar.ERA, 0);//necessary!
+ cal.set(Calendar.YEAR, yearOrig);
+ }
+ if (yearOrig > 1)
+ return "-" + str;
+ else
+ return "0000" + str.substring(4);
+ }
+ return format.format(cal.getTime());
+ } finally {
+ clearFieldsAfter(cal, calPrecField);//restore precision state modified by get()
+ }
+ }
+
+ @Override
+ protected UnitNRShape parseUnitShape(String str) throws ParseException {
+ return toShape(parseCalendar(str));
+ }
+
+ /** Calendar utility method:
+ * The reverse of {@link #toString(java.util.Calendar)}. It will only set the fields found, leaving
+ * the remainder in an un-set state. A leading '-' or '+' is optional (positive assumed), and a
+ * trailing 'Z' is also optional.
+ * @param str not null and not empty
+ * @return not null
+ */
+ public Calendar parseCalendar(String str) throws ParseException {
+ // example: +2014-10-23T21:22:33.159Z
+ if (str == null || str.isEmpty())
+ throw new IllegalArgumentException("str is null or blank");
+ Calendar cal = newCal();
+ if (str.equals("*"))
+ return cal;
+ int offset = 0;//a pointer
+ try {
+ //year & era:
+ int lastOffset = str.charAt(str.length()-1) == 'Z' ? str.length() - 1 : str.length();
+ int hyphenIdx = str.indexOf('-', 1);//look past possible leading hyphen
+ if (hyphenIdx < 0)
+ hyphenIdx = lastOffset;
+ int year = Integer.parseInt(str.substring(offset, hyphenIdx));
+ cal.set(Calendar.ERA, year <= 0 ? 0 : 1);
+ cal.set(Calendar.YEAR, year <= 0 ? -1*year + 1 : year);
+ offset = hyphenIdx + 1;
+ if (lastOffset < offset)
+ return cal;
+
+ //NOTE: We aren't validating separator chars, and we unintentionally accept leading +/-.
+ // The str.substring()'s hopefully get optimized to be stack-allocated.
+
+ //month:
+ cal.set(Calendar.MONTH, Integer.parseInt(str.substring(offset, offset+2)) - 1);//starts at 0
+ offset += 3;
+ if (lastOffset < offset)
+ return cal;
+ //day:
+ cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(str.substring(offset, offset+2)));
+ offset += 3;
+ if (lastOffset < offset)
+ return cal;
+ //hour:
+ cal.set(Calendar.HOUR_OF_DAY, Integer.parseInt(str.substring(offset, offset+2)));
+ offset += 3;
+ if (lastOffset < offset)
+ return cal;
+ //minute:
+ cal.set(Calendar.MINUTE, Integer.parseInt(str.substring(offset, offset+2)));
+ offset += 3;
+ if (lastOffset < offset)
+ return cal;
+ //second:
+ cal.set(Calendar.SECOND, Integer.parseInt(str.substring(offset, offset+2)));
+ offset += 3;
+ if (lastOffset < offset)
+ return cal;
+ //ms:
+ cal.set(Calendar.MILLISECOND, Integer.parseInt(str.substring(offset, offset+3)));
+ offset += 3;//last one, move to next char
+ if (lastOffset == offset)
+ return cal;
+ } catch (Exception e) {
+ ParseException pe = new ParseException("Improperly formatted date: "+str, offset);
+ pe.initCause(e);
+ throw pe;
+ }
+ throw new ParseException("Improperly formatted date: "+str, offset);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java
new file mode 100644
index 0000000..e4f50e0
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/FilterCellIterator.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import com.spatial4j.core.shape.Shape;
+import com.spatial4j.core.shape.SpatialRelation;
+
+import java.util.Iterator;
+
+/**
+ * A filtering iterator of Cells. Those not matching the provided shape (disjoint) are
+ * skipped. If {@code shapeFilter} is null then all cells are returned.
+ *
+ * @lucene.internal
+ */
+class FilterCellIterator extends CellIterator {
+ final Iterator<Cell> baseIter;
+ final Shape shapeFilter;
+
+ FilterCellIterator(Iterator<Cell> baseIter, Shape shapeFilter) {
+ this.baseIter = baseIter;
+ this.shapeFilter = shapeFilter;
+ }
+
+ @Override
+ public boolean hasNext() {
+ thisCell = null;
+ if (nextCell != null)//calling hasNext twice in a row
+ return true;
+ while (baseIter.hasNext()) {
+ nextCell = baseIter.next();
+ if (shapeFilter == null) {
+ return true;
+ } else {
+ SpatialRelation rel = nextCell.getShape().relate(shapeFilter);
+ if (rel.intersects()) {
+ nextCell.setShapeRel(rel);
+ if (rel == SpatialRelation.WITHIN)
+ nextCell.setLeaf();
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java
new file mode 100644
index 0000000..fa4e987
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/GeohashPrefixTree.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.io.GeohashUtils;
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Rectangle;
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A {@link SpatialPrefixTree} based on
+ * <a href="http://en.wikipedia.org/wiki/Geohash">Geohashes</a>.
+ * Uses {@link GeohashUtils} to do all the geohash work.
+ *
+ * @lucene.experimental
+ */
+public class GeohashPrefixTree extends LegacyPrefixTree {
+
+ /**
+ * Factory for creating {@link GeohashPrefixTree} instances with useful defaults
+ */
+ public static class Factory extends SpatialPrefixTreeFactory {
+
+ @Override
+ protected int getLevelForDistance(double degrees) {
+ GeohashPrefixTree grid = new GeohashPrefixTree(ctx, GeohashPrefixTree.getMaxLevelsPossible());
+ return grid.getLevelForDistance(degrees);
+ }
+
+ @Override
+ protected SpatialPrefixTree newSPT() {
+ return new GeohashPrefixTree(ctx,
+ maxLevels != null ? maxLevels : GeohashPrefixTree.getMaxLevelsPossible());
+ }
+ }
+
+ public GeohashPrefixTree(SpatialContext ctx, int maxLevels) {
+ super(ctx, maxLevels);
+ Rectangle bounds = ctx.getWorldBounds();
+ if (bounds.getMinX() != -180)
+ throw new IllegalArgumentException("Geohash only supports lat-lon world bounds. Got "+bounds);
+ int MAXP = getMaxLevelsPossible();
+ if (maxLevels <= 0 || maxLevels > MAXP)
+ throw new IllegalArgumentException("maxLevels must be [1-"+MAXP+"] but got "+ maxLevels);
+ }
+
+ /** Any more than this and there's no point (double lat and lon are the same). */
+ public static int getMaxLevelsPossible() {
+ return GeohashUtils.MAX_PRECISION;
+ }
+
+ @Override
+ public Cell getWorldCell() {
+ return new GhCell(BytesRef.EMPTY_BYTES, 0, 0);
+ }
+
+ @Override
+ public int getLevelForDistance(double dist) {
+ if (dist == 0)
+ return maxLevels;//short circuit
+ final int level = GeohashUtils.lookupHashLenForWidthHeight(dist, dist);
+ return Math.max(Math.min(level, maxLevels), 1);
+ }
+
+ @Override
+ protected Cell getCell(Point p, int level) {
+ return new GhCell(GeohashUtils.encodeLatLon(p.getY(), p.getX(), level));//args are lat,lon (y,x)
+ }
+
+ private static byte[] stringToBytesPlus1(String token) {
+ //copy ASCII token to byte array with one extra spot for eventual LEAF_BYTE if needed
+ byte[] bytes = new byte[token.length() + 1];
+ for (int i = 0; i < token.length(); i++) {
+ bytes[i] = (byte) token.charAt(i);
+ }
+ return bytes;
+ }
+
+ private class GhCell extends LegacyCell {
+
+ private String geohash;//cache; never has leaf byte, simply a geohash
+
+ GhCell(String geohash) {
+ super(stringToBytesPlus1(geohash), 0, geohash.length());
+ this.geohash = geohash;
+ if (isLeaf() && getLevel() < getMaxLevels())//we don't have a leaf byte at max levels (an opt)
+ this.geohash = geohash.substring(0, geohash.length() - 1);
+ }
+
+ GhCell(byte[] bytes, int off, int len) {
+ super(bytes, off, len);
+ }
+
+ @Override
+ protected GeohashPrefixTree getGrid() { return GeohashPrefixTree.this; }
+
+ @Override
+ protected int getMaxLevels() { return maxLevels; }
+
+ @Override
+ protected void readCell(BytesRef bytesRef) {
+ super.readCell(bytesRef);
+ geohash = null;
+ }
+
+ @Override
+ public Collection<Cell> getSubCells() {
+ String[] hashes = GeohashUtils.getSubGeohashes(getGeohash());//sorted
+ List<Cell> cells = new ArrayList<>(hashes.length);
+ for (String hash : hashes) {
+ cells.add(new GhCell(hash));
+ }
+ return cells;
+ }
+
+ @Override
+ public int getSubCellsSize() {
+ return 32;//8x4
+ }
+
+ @Override
+ protected GhCell getSubCell(Point p) {
+ return (GhCell) getGrid().getCell(p, getLevel() + 1);//not performant!
+ }
+
+ @Override
+ public Shape getShape() {
+ if (shape == null) {
+ shape = GeohashUtils.decodeBoundary(getGeohash(), getGrid().getSpatialContext());
+ }
+ return shape;
+ }
+
+ private String getGeohash() {
+ if (geohash == null)
+ geohash = getTokenBytesNoLeaf(null).utf8ToString();
+ return geohash;
+ }
+
+ }//class GhCell
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java
new file mode 100644
index 0000000..27c56a7
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyCell.java
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import java.util.Collection;
+
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Shape;
+import com.spatial4j.core.shape.SpatialRelation;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+
+/** The base for the original two SPT's: Geohash and Quad. Don't subclass this for new SPTs.
+ * @lucene.internal */
+//public for RPT pruneLeafyBranches code
+public abstract class LegacyCell implements Cell {
+
+ // Important: A LegacyCell doesn't share state for getNextLevelCells(), and
+ // LegacySpatialPrefixTree assumes this in its simplify tree logic.
+
+ private static final byte LEAF_BYTE = '+';//NOTE: must sort before letters & numbers
+
+ //Arguably we could simply use a BytesRef, using an extra Object.
+ protected byte[] bytes;//generally bigger to potentially hold a leaf
+ protected int b_off;
+ protected int b_len;//doesn't reflect leaf; same as getLevel()
+
+ protected boolean isLeaf;
+
+ /**
+ * When set via getSubCells(filter), it is the relationship between this cell
+ * and the given shape filter. Doesn't participate in shape equality.
+ */
+ protected SpatialRelation shapeRel;
+
+ protected Shape shape;//cached
+
+ /** Warning: Refers to the same bytes (no copy). If {@link #setLeaf()} is subsequently called then it
+ * may modify bytes. */
+ protected LegacyCell(byte[] bytes, int off, int len) {
+ this.bytes = bytes;
+ this.b_off = off;
+ this.b_len = len;
+ readLeafAdjust();
+ }
+
+ protected void readCell(BytesRef bytes) {
+ shapeRel = null;
+ shape = null;
+ this.bytes = bytes.bytes;
+ this.b_off = bytes.offset;
+ this.b_len = (short) bytes.length;
+ readLeafAdjust();
+ }
+
+ protected void readLeafAdjust() {
+ isLeaf = (b_len > 0 && bytes[b_off + b_len - 1] == LEAF_BYTE);
+ if (isLeaf)
+ b_len--;
+ if (getLevel() == getMaxLevels())
+ isLeaf = true;
+ }
+
+ protected abstract SpatialPrefixTree getGrid();
+
+ protected abstract int getMaxLevels();
+
+ @Override
+ public SpatialRelation getShapeRel() {
+ return shapeRel;
+ }
+
+ @Override
+ public void setShapeRel(SpatialRelation rel) {
+ this.shapeRel = rel;
+ }
+
+ @Override
+ public boolean isLeaf() {
+ return isLeaf;
+ }
+
+ @Override
+ public void setLeaf() {
+ isLeaf = true;
+ }
+
+ @Override
+ public BytesRef getTokenBytesWithLeaf(BytesRef result) {
+ result = getTokenBytesNoLeaf(result);
+ if (!isLeaf || getLevel() == getMaxLevels())
+ return result;
+ if (result.bytes.length < result.offset + result.length + 1) {
+ assert false : "Not supposed to happen; performance bug";
+ byte[] copy = new byte[result.length + 1];
+ System.arraycopy(result.bytes, result.offset, copy, 0, result.length - 1);
+ result.bytes = copy;
+ result.offset = 0;
+ }
+ result.bytes[result.offset + result.length++] = LEAF_BYTE;
+ return result;
+ }
+
+ @Override
+ public BytesRef getTokenBytesNoLeaf(BytesRef result) {
+ if (result == null)
+ return new BytesRef(bytes, b_off, b_len);
+ result.bytes = bytes;
+ result.offset = b_off;
+ result.length = b_len;
+ return result;
+ }
+
+ @Override
+ public int getLevel() {
+ return b_len;
+ }
+
+ @Override
+ public CellIterator getNextLevelCells(Shape shapeFilter) {
+ assert getLevel() < getGrid().getMaxLevels();
+ if (shapeFilter instanceof Point) {
+ LegacyCell cell = getSubCell((Point) shapeFilter);
+ cell.shapeRel = SpatialRelation.CONTAINS;
+ return new SingletonCellIterator(cell);
+ } else {
+ return new FilterCellIterator(getSubCells().iterator(), shapeFilter);
+ }
+ }
+
+ /**
+ * Performant implementations are expected to implement this efficiently by
+ * considering the current cell's boundary.
+ * <p>
+ * Precondition: Never called when getLevel() == maxLevel.
+ * Precondition: this.getShape().relate(p) != DISJOINT.
+ */
+ protected abstract LegacyCell getSubCell(Point p);
+
+ /**
+ * Gets the cells at the next grid cell level that covers this cell.
+ * Precondition: Never called when getLevel() == maxLevel.
+ *
+ * @return A set of cells (no dups), sorted, modifiable, not empty, not null.
+ */
+ protected abstract Collection<Cell> getSubCells();
+
+ /**
+ * {@link #getSubCells()}.size() -- usually a constant. Should be >=2
+ */
+ public abstract int getSubCellsSize();
+
+ @Override
+ public boolean isPrefixOf(Cell c) {
+ //Note: this only works when each level uses a whole number of bytes.
+ LegacyCell cell = (LegacyCell)c;
+ boolean result = sliceEquals(cell.bytes, cell.b_off, cell.b_len, bytes, b_off, b_len);
+ assert result == StringHelper.startsWith(c.getTokenBytesNoLeaf(null), getTokenBytesNoLeaf(null));
+ return result;
+ }
+
+ /** Copied from {@link org.apache.lucene.util.StringHelper#startsWith(BytesRef, BytesRef)}
+ * which calls this. This is to avoid creating a BytesRef. */
+ private static boolean sliceEquals(byte[] sliceToTest_bytes, int sliceToTest_offset, int sliceToTest_length,
+ byte[] other_bytes, int other_offset, int other_length) {
+ if (sliceToTest_length < other_length) {
+ return false;
+ }
+ int i = sliceToTest_offset;
+ int j = other_offset;
+ final int k = other_offset + other_length;
+
+ while (j < k) {
+ if (sliceToTest_bytes[i++] != other_bytes[j++]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ @Override
+ public int compareToNoLeaf(Cell fromCell) {
+ LegacyCell b = (LegacyCell) fromCell;
+ return compare(bytes, b_off, b_len, b.bytes, b.b_off, b.b_len);
+ }
+
+ /** Copied from {@link BytesRef#compareTo(BytesRef)}.
+ * This is to avoid creating a BytesRef. */
+ protected static int compare(byte[] aBytes, int aUpto, int a_length, byte[] bBytes, int bUpto, int b_length) {
+ final int aStop = aUpto + Math.min(a_length, b_length);
+ while(aUpto < aStop) {
+ int aByte = aBytes[aUpto++] & 0xff;
+ int bByte = bBytes[bUpto++] & 0xff;
+
+ int diff = aByte - bByte;
+ if (diff != 0) {
+ return diff;
+ }
+ }
+
+ // One is a prefix of the other, or, they are equal:
+ return a_length - b_length;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ //this method isn't "normally" called; just in asserts/tests
+ if (obj instanceof Cell) {
+ Cell cell = (Cell) obj;
+ return getTokenBytesWithLeaf(null).equals(cell.getTokenBytesWithLeaf(null));
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return getTokenBytesWithLeaf(null).hashCode();
+ }
+
+ @Override
+ public String toString() {
+ //this method isn't "normally" called; just in asserts/tests
+ return getTokenBytesWithLeaf(null).utf8ToString();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java
new file mode 100644
index 0000000..672c2fe
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/LegacyPrefixTree.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import java.util.Arrays;
+
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Rectangle;
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.util.BytesRef;
+
+/** The base for the original two SPT's: Geohash and Quad. Don't subclass this for new SPTs.
+ * @lucene.internal */
+abstract class LegacyPrefixTree extends SpatialPrefixTree {
+ public LegacyPrefixTree(SpatialContext ctx, int maxLevels) {
+ super(ctx, maxLevels);
+ }
+
+ public double getDistanceForLevel(int level) {
+ if (level < 1 || level > getMaxLevels())
+ throw new IllegalArgumentException("Level must be in 1 to maxLevels range");
+ //TODO cache for each level
+ Cell cell = getCell(ctx.getWorldBounds().getCenter(), level);
+ Rectangle bbox = cell.getShape().getBoundingBox();
+ double width = bbox.getWidth();
+ double height = bbox.getHeight();
+ //Use standard cartesian hypotenuse. For geospatial, this answer is larger
+ // than the correct one but it's okay to over-estimate.
+ return Math.sqrt(width * width + height * height);
+ }
+
+ /**
+ * Returns the cell containing point {@code p} at the specified {@code level}.
+ */
+ protected abstract Cell getCell(Point p, int level);
+
+ @Override
+ public Cell readCell(BytesRef term, Cell scratch) {
+ LegacyCell cell = (LegacyCell) scratch;
+ if (cell == null)
+ cell = (LegacyCell) getWorldCell();
+ cell.readCell(term);
+ return cell;
+ }
+
+ @Override
+ public CellIterator getTreeCellIterator(Shape shape, int detailLevel) {
+ if (!(shape instanceof Point))
+ return super.getTreeCellIterator(shape, detailLevel);
+
+ //This specialization is here because the legacy implementations don't have a fast implementation of
+ // cell.getSubCells(point). It's fastest here to encode the full bytes for detailLevel, and create
+ // subcells from the bytesRef in a loop. This avoids an O(N^2) encode, and we have O(N) instead.
+
+ Cell cell = getCell((Point) shape, detailLevel);
+ assert cell instanceof LegacyCell;
+ BytesRef fullBytes = cell.getTokenBytesNoLeaf(null);
+ //fill in reverse order to be sorted
+ Cell[] cells = new Cell[detailLevel];
+ for (int i = 1; i < detailLevel; i++) {
+ fullBytes.length = i;
+ Cell parentCell = readCell(fullBytes, null);
+ cells[i-1] = parentCell;
+ }
+ cells[detailLevel-1] = cell;
+ return new FilterCellIterator(Arrays.asList(cells).iterator(), null);//null filter
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/89db4950/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java
new file mode 100644
index 0000000..40e80bc
--- /dev/null
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java
@@ -0,0 +1,989 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.spatial.prefix.tree;
+
+import java.text.ParseException;
+
+import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.context.SpatialContextFactory;
+import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Rectangle;
+import com.spatial4j.core.shape.Shape;
+import com.spatial4j.core.shape.SpatialRelation;
+import com.spatial4j.core.shape.impl.RectangleImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+
+/**
+ * A SpatialPrefixTree for single-dimensional numbers and number ranges of fixed precision values (not floating point).
+ * Despite its name, the indexed values (and queries) need not actually be ranges, they can be unit instance/values.
+ * <p>
+ * Why might you use this instead of Lucene's built-in integer/long support? Here are some reasons with features based
+ * on code in this class, <em>or are possible based on this class but require a subclass to fully realize it</em>.
+ * <ul>
+ * <li>Index ranges, not just unit instances. This is especially useful when the requirement calls for a
+ * multi-valued range.</li>
+ * <li>Instead of a fixed "precisionStep", this prefixTree can have a customizable number of child values for any
+ * prefix (up to 32768). This allows exact alignment of the prefix-tree with typical/expected values, which
+ * results in better performance. For example in a Date implementation, every month can get its own dedicated prefix,
+ * every day, etc., even though months vary in duration.</li>
+ * <li>Arbitrary precision, like {@link java.math.BigDecimal}.</li>
+ * <li>Standard Lucene integer/long indexing always indexes the full precision of those data types but this one
+ * is customizable.</li>
+ * </ul>
+ *
+ * Unlike "normal" spatial components in this module, this special-purpose one only works with {@link Shape}s
+ * created by the methods on this class, not from any {@link com.spatial4j.core.context.SpatialContext}.
+ *
+ * @see org.apache.lucene.spatial.prefix.NumberRangePrefixTreeStrategy
+ * @see <a href="https://issues.apache.org/jira/browse/LUCENE-5648">LUCENE-5648</a>
+ * @lucene.experimental
+ */
+public abstract class NumberRangePrefixTree extends SpatialPrefixTree {
+
+ //
+ // Dummy SpatialContext
+ //
+
+ private static final SpatialContext DUMMY_CTX;
+ static {
+ SpatialContextFactory factory = new SpatialContextFactory();
+ factory.geo = false;
+ factory.worldBounds = new RectangleImpl(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0L, 0L, null);
+ DUMMY_CTX = factory.newSpatialContext();
+ }
+
+ /** Base interface for {@link Shape}s this prefix tree supports. It extends {@link Shape} (Spatial4j) for compatibility
+ * with the spatial API even though it doesn't intermix with conventional 2D shapes.
+ * @lucene.experimental
+ */
+ public static interface NRShape extends Shape, Cloneable {
+ /** The result should be parseable by {@link #parseShape(String)}. */
+ abstract String toString();
+
+ /** Returns this shape rounded to the target level. If we are already more course than the level then the shape is
+ * simply returned. The result may refer to internal state of the argument so you may want to clone it.
+ */
+ public NRShape roundToLevel(int targetLevel);
+ }
+
+ //
+ // Factory / Conversions / parsing relating to NRShapes
+ //
+
+ /** Converts the value to a unit shape. Doesn't parse strings; see {@link #parseShape(String)} for
+ * that. This is the reverse of {@link #toObject(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}. */
+ public abstract UnitNRShape toUnitShape(Object value);
+
+ /** Returns a shape that represents the continuous range between {@code start} and {@code end}. It will
+ * be normalized, and so sometimes a {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape}
+ * will be returned, other times a
+ * {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.SpanUnitsNRShape} will be.
+ *
+ * @throws IllegalArgumentException if the arguments are in the wrong order, or if either contains the other (yet they
+ * aren't equal).
+ */
+ public NRShape toRangeShape(UnitNRShape startUnit, UnitNRShape endUnit) {
+ //note: this normalization/optimization process is actually REQUIRED based on assumptions elsewhere.
+ //Normalize start & end
+ startUnit = startUnit.getShapeAtLevel(truncateStartVals(startUnit, 0)); // chops off trailing min-vals (zeroes)
+ endUnit = endUnit.getShapeAtLevel(truncateEndVals(endUnit, 0)); // chops off trailing max-vals
+ //Optimize to just start or end if it's equivalent, e.g. April to April 1st is April 1st.
+ int cmp = comparePrefix(startUnit, endUnit);
+ if (cmp > 0) {
+ throw new IllegalArgumentException("Wrong order: "+ startUnit +" TO "+ endUnit);
+ }
+ if (cmp == 0) {//one is a prefix of the other
+ if (startUnit.getLevel() == endUnit.getLevel()) {
+ //same
+ return startUnit;
+ } else if (endUnit.getLevel() > startUnit.getLevel()) {
+ // e.g. April to April 1st
+ if (truncateStartVals(endUnit, startUnit.getLevel()) == startUnit.getLevel()) {
+ return endUnit;
+ }
+ } else {//minLV level > maxLV level
+ // e.g. April 30 to April
+ if (truncateEndVals(startUnit, endUnit.getLevel()) == endUnit.getLevel()) {
+ return startUnit;
+ }
+ }
+ }
+ return new SpanUnitsNRShape(startUnit, endUnit);
+ }
+
+ /** From lv.getLevel on up, it returns the first Level seen with val != 0. It doesn't check past endLevel. */
+ private int truncateStartVals(UnitNRShape lv, int endLevel) {
+ for (int level = lv.getLevel(); level > endLevel; level--) {
+ if (lv.getValAtLevel(level) != 0)
+ return level;
+ }
+ return endLevel;
+ }
+
+ private int truncateEndVals(UnitNRShape lv, int endLevel) {
+ for (int level = lv.getLevel(); level > endLevel; level--) {
+ int max = getNumSubCells(lv.getShapeAtLevel(level - 1)) - 1;
+ if (lv.getValAtLevel(level) != max)
+ return level;
+ }
+ return endLevel;
+ }
+
+ /** Converts a UnitNRShape shape to the corresponding type supported by this class, such as a Calendar/BigDecimal.
+ * This is the reverse of {@link #toUnitShape(Object)}.
+ */
+ public abstract Object toObject(UnitNRShape shape);
+
+ /** A string representation of the UnitNRShape that is parse-able by {@link #parseUnitShape(String)}. */
+ protected abstract String toString(UnitNRShape lv);
+
+ protected static String toStringUnitRaw(UnitNRShape lv) {
+ StringBuilder buf = new StringBuilder(100);
+ buf.append('[');
+ for (int level = 1; level <= lv.getLevel(); level++) {
+ buf.append(lv.getValAtLevel(level)).append(',');
+ }
+ buf.setLength(buf.length()-1);//chop off ','
+ buf.append(']');
+ return buf.toString();
+ }
+
+ /** Detects a range pattern and parses it, otherwise it's parsed as one shape via
+ * {@link #parseUnitShape(String)}. The range pattern looks like this BNF:
+ * <pre>
+ * '[' + parseShapeLV + ' TO ' + parseShapeLV + ']'
+ * </pre>
+ * It's the same thing as the toString() of the range shape, notwithstanding range optimization.
+ *
+ * @param str not null or empty
+ * @return not null
+ * @throws java.text.ParseException If there is a problem
+ */
+ public NRShape parseShape(String str) throws ParseException {
+ if (str == null || str.isEmpty())
+ throw new IllegalArgumentException("str is null or blank");
+ if (str.charAt(0) == '[') {
+ if (str.charAt(str.length()-1) != ']')
+ throw new ParseException("If starts with [ must end with ]; got "+str, str.length()-1);
+ int middle = str.indexOf(" TO ");
+ if (middle < 0)
+ throw new ParseException("If starts with [ must contain ' TO '; got "+str, -1);
+ String leftStr = str.substring(1, middle);
+ String rightStr = str.substring(middle + " TO ".length(), str.length()-1);
+ return toRangeShape(parseUnitShape(leftStr), parseUnitShape(rightStr));
+ } else if (str.charAt(0) == '{') {
+ throw new ParseException("Exclusive ranges not supported; got "+str, 0);
+ } else {
+ return parseUnitShape(str);
+ }
+ }
+
+ /** Parse a String to a UnitNRShape. "*" should be the full-range (level 0 shape). */
+ protected abstract UnitNRShape parseUnitShape(String str) throws ParseException;
+
+
+ //
+ // UnitNRShape
+ //
+
+ /**
+ * A unit value Shape implemented as a stack of numbers, one for each level in the prefix tree. It directly
+ * corresponds to a {@link Cell}. Spatially speaking, it's analogous to a Point but 1D and has some precision width.
+ * @lucene.experimental
+ */
+ public static interface UnitNRShape extends NRShape, Comparable<UnitNRShape> {
+ //note: formerly known as LevelledValue; thus some variables still use 'lv'
+
+ /** Get the prefix tree level, the higher the more precise. 0 means the world (universe). */
+ int getLevel();
+ /** Gets the value at the specified level of this unit. level must be >= 0 and <= getLevel(). */
+ int getValAtLevel(int level);
+ /** Gets an ancestor at the specified level. It shares state, so you may want to clone() it. */
+ UnitNRShape getShapeAtLevel(int level);
+ @Override
+ UnitNRShape roundToLevel(int targetLevel);
+
+ /** Deep clone */
+ UnitNRShape clone();
+ }
+
+ /** Compares a to b, returning less than 0, 0, or greater than 0, if a is less than, equal to, or
+ * greater than b, respectively, up to their common prefix (i.e. only min(a.levels,b.levels) are compared).
+ * @lucene.internal */
+ protected static int comparePrefix(UnitNRShape a, UnitNRShape b) {
+ int minLevel = Math.min(a.getLevel(), b.getLevel());
+ for (int level = 1; level <= minLevel; level++) {
+ int diff = a.getValAtLevel(level) - b.getValAtLevel(level);
+ if (diff != 0)
+ return diff;
+ }
+ return 0;
+ }
+
+
+ //
+ // SpanUnitsNRShape
+ //
+
+ /** A range Shape; based on a pair of {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape}.
+ * Spatially speaking, it's analogous to a Rectangle but 1D. It might have been named with Range in the name but it
+ * may be confusing since even the {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape}
+ * is in some sense a range.
+ * @lucene.experimental */
+ public class SpanUnitsNRShape implements NRShape {
+
+ private final UnitNRShape minLV, maxLV;
+ private final int lastLevelInCommon;//computed; not part of identity
+
+ /** Don't call directly; see
+ * {@link #toRangeShape(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape, org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}. */
+ private SpanUnitsNRShape(UnitNRShape minLV, UnitNRShape maxLV) {
+ this.minLV = minLV;
+ this.maxLV = maxLV;
+
+ //calc lastLevelInCommon
+ int level = 1;
+ for (; level <= minLV.getLevel() && level <= maxLV.getLevel(); level++) {
+ if (minLV.getValAtLevel(level) != maxLV.getValAtLevel(level))
+ break;
+ }
+ lastLevelInCommon = level - 1;
+ }
+
+ @Override
+ public SpatialContext getContext() {
+ return DUMMY_CTX;
+ }
+
+ public UnitNRShape getMinUnit() { return minLV; }
+
+ public UnitNRShape getMaxUnit() { return maxLV; }
+
+ /** How many levels are in common between minUnit and maxUnit, not including level 0. */
+ private int getLevelsInCommon() { return lastLevelInCommon; }
+
+ @Override
+ public NRShape roundToLevel(int targetLevel) {
+ return toRangeShape(minLV.roundToLevel(targetLevel), maxLV.roundToLevel(targetLevel));
+ }
+
+ @Override
+ public SpatialRelation relate(Shape shape) {
+// if (shape instanceof UnitNRShape)
+// return relate((UnitNRShape)shape);
+ if (shape instanceof SpanUnitsNRShape)
+ return relate((SpanUnitsNRShape) shape);
+ return shape.relate(this).transpose();//probably a UnitNRShape
+ }
+
+ public SpatialRelation relate(SpanUnitsNRShape ext) {
+ //This logic somewhat mirrors RectangleImpl.relate_range()
+ int extMin_intMax = comparePrefix(ext.getMinUnit(), getMaxUnit());
+ if (extMin_intMax > 0)
+ return SpatialRelation.DISJOINT;
+ int extMax_intMin = comparePrefix(ext.getMaxUnit(), getMinUnit());
+ if (extMax_intMin < 0)
+ return SpatialRelation.DISJOINT;
+ int extMin_intMin = comparePrefix(ext.getMinUnit(), getMinUnit());
+ int extMax_intMax = comparePrefix(ext.getMaxUnit(), getMaxUnit());
+ if ((extMin_intMin > 0 || extMin_intMin == 0 && ext.getMinUnit().getLevel() >= getMinUnit().getLevel())
+ && (extMax_intMax < 0 || extMax_intMax == 0 && ext.getMaxUnit().getLevel() >= getMaxUnit().getLevel()))
+ return SpatialRelation.CONTAINS;
+ if ((extMin_intMin < 0 || extMin_intMin == 0 && ext.getMinUnit().getLevel() <= getMinUnit().getLevel())
+ && (extMax_intMax > 0 || extMax_intMax == 0 && ext.getMaxUnit().getLevel() <= getMaxUnit().getLevel()))
+ return SpatialRelation.WITHIN;
+ return SpatialRelation.INTERSECTS;
+ }
+
+ @Override
+ public Rectangle getBoundingBox() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public boolean hasArea() { return true; }
+
+ @Override
+ public double getArea(SpatialContext spatialContext) { throw new UnsupportedOperationException(); }
+
+ @Override
+ public Point getCenter() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public Shape getBuffered(double v, SpatialContext spatialContext) { throw new UnsupportedOperationException(); }
+
+ @Override
+ public boolean isEmpty() { return false; }
+
+ /** A deep clone. */
+ @Override
+ public SpanUnitsNRShape clone() {
+ return new SpanUnitsNRShape(minLV.clone(), maxLV.clone());
+ }
+
+ @Override
+ public String toString() {
+ return "[" + NumberRangePrefixTree.this.toString(minLV) + " TO "
+ + NumberRangePrefixTree.this.toString(maxLV) + "]";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ SpanUnitsNRShape spanShape = (SpanUnitsNRShape) o;
+
+ if (!maxLV.equals(spanShape.maxLV)) return false;
+ if (!minLV.equals(spanShape.minLV)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = minLV.hashCode();
+ result = 31 * result + maxLV.hashCode();
+ return result;
+ }
+ }// class SpanUnitsNRShape
+
+ //
+ // NumberRangePrefixTree
+ //
+
+ protected final int[] maxSubCellsByLevel;
+ protected final int[] termLenByLevel;
+ protected final int[] levelByTermLen;
+ protected final int maxTermLen; // how long could cell.getToken... (that is a leaf) possibly be?
+
+ protected NumberRangePrefixTree(int[] maxSubCellsByLevel) {
+ super(DUMMY_CTX, maxSubCellsByLevel.length);
+ this.maxSubCellsByLevel = maxSubCellsByLevel;
+
+ // Fill termLenByLevel
+ this.termLenByLevel = new int[maxLevels + 1];
+ termLenByLevel[0] = 0;
+ final int MAX_STATES = 1 << 15;//1 bit less than 2 bytes
+ for (int level = 1; level <= maxLevels; level++) {
+ final int states = maxSubCellsByLevel[level - 1];
+ if (states >= MAX_STATES || states <= 1) {
+ throw new IllegalArgumentException("Max states is "+MAX_STATES+", given "+states+" at level "+level);
+ }
+ boolean twoBytes = states >= 256;
+ termLenByLevel[level] = termLenByLevel[level-1] + (twoBytes ? 2 : 1);
+ }
+ maxTermLen = termLenByLevel[maxLevels] + 1;// + 1 for leaf byte
+
+ // Fill levelByTermLen
+ levelByTermLen = new int[maxTermLen];
+ levelByTermLen[0] = 0;
+ for (int level = 1; level < termLenByLevel.length; level++) {
+ int termLen = termLenByLevel[level];
+ int prevTermLen = termLenByLevel[level-1];
+ if (termLen - prevTermLen == 2) {//2 byte delta
+ //if the term doesn't completely cover this cell then it must be a leaf of the prior.
+ levelByTermLen[termLen-1] = -1;//won't be used; otherwise erroneous
+ levelByTermLen[termLen] = level;
+ } else {//1 byte delta
+ assert termLen - prevTermLen == 1;
+ levelByTermLen[termLen] = level;
+ }
+ }
+
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName();
+ }
+
+ @Override
+ public int getLevelForDistance(double dist) {
+ //note: it might be useful to compute which level has a raw width (counted in
+ // bottom units, e.g. milliseconds), that covers the provided dist in those units?
+ return maxLevels; // thus always use full precision. We don't do approximations in this tree/strategy.
+ //throw new UnsupportedOperationException("Not applicable.");
+ }
+
+ @Override
+ public double getDistanceForLevel(int level) {
+ //note: we could compute this... should we?
+ throw new UnsupportedOperationException("Not applicable.");
+ }
+
+ protected UnitNRShape toShape(int[] valStack, int len) {
+ final NRCell[] cellStack = newCellStack(len);
+ for (int i = 0; i < len; i++) {
+ cellStack[i+1].resetCellWithCellNum(valStack[i]);
+ }
+ return cellStack[len];
+ }
+
+ @Override
+ public Cell getWorldCell() {
+ return newCellStack(maxLevels)[0];
+ }
+
+ protected NRCell[] newCellStack(int levels) {
+ final NRCell[] cellsByLevel = new NRCell[levels + 1];
+ final BytesRef term = new BytesRef(maxTermLen);
+ for (int level = 0; level <= levels; level++) {
+ cellsByLevel[level] = new NRCell(cellsByLevel,term,level);
+ }
+ return cellsByLevel;
+ }
+
+ @Override
+ public Cell readCell(BytesRef term, Cell scratch) {
+ if (scratch == null)
+ scratch = getWorldCell();
+
+ //We decode level #, leaf boolean, and populate bytes by reference. We don't decode the stack.
+
+ //reverse lookup term length to the level and hence the cell
+ NRCell[] cellsByLevel = ((NRCell) scratch).cellsByLevel;
+ boolean isLeaf = term.bytes[term.offset + term.length - 1] == 0;
+ int lenNoLeaf = isLeaf ? term.length - 1 : term.length;
+
+ NRCell result = cellsByLevel[levelByTermLen[lenNoLeaf]];
+ if (cellsByLevel[0].termBuf == null)
+ cellsByLevel[0].termBuf = result.term.bytes;//a kluge; see cell.ensureOwnTermBytes()
+ result.term.bytes = term.bytes;
+ result.term.offset = term.offset;
+ result.term.length = lenNoLeaf;//technically this isn't used but may help debugging
+ result.reset();
+ if (isLeaf)
+ result.setLeaf();
+
+ result.cellNumber = -1;//lazy decode flag
+
+ return result;
+ }
+
+ /** Returns the number of sub-cells beneath the given UnitNRShape. */
+ public int getNumSubCells(UnitNRShape lv) {
+ return maxSubCellsByLevel[lv.getLevel()];
+ }
+
+ //
+ // NRCell
+ //
+
+ /** Most of the PrefixTree implementation is in this one class, which is both
+ * the Cell, the CellIterator, and the Shape to reduce object allocation. It's implemented as a re-used array/stack
+ * of Cells at adjacent levels, that all have a reference back to the cell array to traverse. They also share a common
+ * BytesRef for the term.
+ * @lucene.internal */
+ protected class NRCell extends CellIterator implements Cell, UnitNRShape {
+
+ //Shared: (TODO put this in a new class)
+ final NRCell[] cellsByLevel;
+ final BytesRef term;//AKA the token
+ byte[] termBuf;// see ensureOwnTermBytes(), only for cell0
+
+ //Cell state...
+ final int cellLevel; // assert levelStack[cellLevel] == this
+ int cellNumber; //relative to parent cell. It's unused for level 0. Starts at 0.
+
+ SpatialRelation cellShapeRel;
+ boolean cellIsLeaf;
+
+ //CellIterator state is defined further below
+
+ NRCell(NRCell[] cellsByLevel, BytesRef term, int cellLevel) {
+ this.cellsByLevel = cellsByLevel;
+ this.term = term;
+ this.cellLevel = cellLevel;
+ this.cellNumber = cellLevel == 0 ? 0 : -1;
+ this.cellIsLeaf = false;
+ assert cellsByLevel[cellLevel] == null;
+ }
+
+ /** Ensure we own term.bytes so that it's safe to modify. We detect via a kluge in which cellsByLevel[0].termBuf
+ * is non-null, which is a pre-allocated for use to replace term.bytes. */
+ void ensureOwnTermBytes() {
+ NRCell cell0 = cellsByLevel[0];
+ if (cell0.termBuf == null)
+ return;//we already own the bytes
+ System.arraycopy(term.bytes, term.offset, cell0.termBuf, 0, term.length);
+ term.bytes = cell0.termBuf;
+ term.offset = 0;
+ cell0.termBuf = null;
+ }
+
+ private void reset() {
+ this.cellIsLeaf = false;
+ this.cellShapeRel = null;
+ }
+
+ private void resetCellWithCellNum(int cellNumber) {
+ reset();
+
+ //update bytes
+ // note: see lazyInitCellNumsFromBytes() for the reverse
+ if (cellNumber >= 0) {//valid
+ ensureOwnTermBytes();
+ int termLen = termLenByLevel[getLevel()];
+ boolean twoBytes = (termLen - termLenByLevel[getLevel()-1]) > 1;
+ if (twoBytes) {
+ //right 7 bits, plus 1 (may overflow to 8th bit which is okay)
+ term.bytes[termLen-2] = (byte) (cellNumber >> 7);
+ term.bytes[termLen-1] = (byte) ((cellNumber & 0x7F) + 1);
+ } else {
+ term.bytes[termLen-1] = (byte) (cellNumber+1);
+ }
+ assert term.bytes[termLen-1] != 0;
+ term.length = termLen;
+ }
+ this.cellNumber = cellNumber;
+ }
+
+ private void ensureDecoded() {
+ if (cellNumber >= 0)
+ return;
+ //Decode cell numbers from bytes. This is the inverse of resetCellWithCellNum().
+ for (int level = 1; level <= getLevel(); level++) {
+ NRCell cell = cellsByLevel[level];
+ int termLen = termLenByLevel[level];
+ boolean twoBytes = (termLen - termLenByLevel[level-1]) > 1;
+ if (twoBytes) {
+ int byteH = (term.bytes[term.offset + termLen - 2] & 0xFF);
+ int byteL = (term.bytes[term.offset + termLen - 1] & 0xFF);
+ assert byteL - 1 < (1<<7);
+ cell.cellNumber = (byteH << 7) + (byteL-1);
+ assert cell.cellNumber < 1<<15;
+ } else {
+ cell.cellNumber = (term.bytes[term.offset + termLen - 1] & 0xFF) - 1;
+ assert cell.cellNumber < 255;
+ }
+ cell.assertDecoded();
+ }
+ }
+
+ private void assertDecoded() {
+ assert cellNumber >= 0 : "Illegal state; ensureDecoded() wasn't called";
+ }
+
+ @Override // for Cell & for UnitNRShape
+ public int getLevel() {
+ return cellLevel;
+ }
+
+ @Override
+ public SpatialRelation getShapeRel() {
+ return cellShapeRel;
+ }
+
+ @Override
+ public void setShapeRel(SpatialRelation rel) {
+ cellShapeRel = rel;
+ }
+
+ @Override
+ public boolean isLeaf() {
+ return cellIsLeaf;
+ }
+
+ @Override
+ public void setLeaf() {
+ cellIsLeaf = true;
+ }
+
+ @Override
+ public UnitNRShape getShape() {
+ ensureDecoded();
+ return this;
+ }
+
+ @Override
+ public BytesRef getTokenBytesNoLeaf(BytesRef result) {
+ if (result == null)
+ result = new BytesRef();
+ result.bytes = term.bytes;
+ result.offset = term.offset;
+ result.length = termLenByLevel[cellLevel];
+ assert result.length <= term.length;
+ return result;
+ }
+
+ @Override
+ public BytesRef getTokenBytesWithLeaf(BytesRef result) {
+ ensureOwnTermBytes();//normally shouldn't do anything
+ result = getTokenBytesNoLeaf(result);
+ if (isLeaf()) {
+ result.bytes[result.length++] = 0;
+ }
+ return result;
+ }
+
+ @Override
+ public boolean isPrefixOf(Cell c) {
+ NRCell otherCell = (NRCell) c;
+ assert term != otherCell.term;
+ //trick to re-use bytesref; provided that we re-instate it
+ int myLastLen = term.length;
+ term.length = termLenByLevel[getLevel()];
+ int otherLastLen = otherCell.term.length;
+ otherCell.term.length = termLenByLevel[otherCell.getLevel()];
+ boolean answer = StringHelper.startsWith(otherCell.term, term);
+ term.length = myLastLen;
+ otherCell.term.length = otherLastLen;
+ return answer;
+ }
+
+ @Override
+ public int compareToNoLeaf(Cell fromCell) {
+ final NRCell nrCell = (NRCell) fromCell;
+ assert term != nrCell.term;
+ //trick to re-use bytesref; provided that we re-instate it
+ int myLastLen = term.length;
+ int otherLastLen = nrCell.term.length;
+ term.length = termLenByLevel[getLevel()];
+ nrCell.term.length = termLenByLevel[nrCell.getLevel()];
+ int answer = term.compareTo(nrCell.term);
+ term.length = myLastLen;
+ nrCell.term.length = otherLastLen;
+ return answer;
+ }
+
+ @Override
+ public CellIterator getNextLevelCells(Shape shapeFilter) {
+ ensureDecoded();
+ NRCell subCell = cellsByLevel[cellLevel + 1];
+ subCell.initIter(shapeFilter);
+ return subCell;
+ }
+
+ //----------- CellIterator
+
+ Shape iterFilter;//UnitNRShape or NRShape
+ boolean iterFirstIsIntersects;
+ boolean iterLastIsIntersects;
+ int iterFirstCellNumber;
+ int iterLastCellNumber;
+
+ private void initIter(Shape filter) {
+ cellNumber = -1;
+ if (filter instanceof UnitNRShape && ((UnitNRShape) filter).getLevel() == 0)
+ filter = null;//world means everything -- no filter
+ iterFilter = filter;
+
+ NRCell parent = getShapeAtLevel(getLevel() - 1);
+
+ // Initialize iter* members.
+
+ //no filter means all subcells
+ if (filter == null) {
+ iterFirstCellNumber = 0;
+ iterFirstIsIntersects = false;
+ iterLastCellNumber = getNumSubCells(parent) - 1;
+ iterLastIsIntersects = false;
+ return;
+ }
+
+ final UnitNRShape minLV;
+ final UnitNRShape maxLV;
+ final int lastLevelInCommon;//between minLV & maxLV
+ if (filter instanceof SpanUnitsNRShape) {
+ SpanUnitsNRShape spanShape = (SpanUnitsNRShape) iterFilter;
+ minLV = spanShape.getMinUnit();
+ maxLV = spanShape.getMaxUnit();
+ lastLevelInCommon = spanShape.getLevelsInCommon();
+ } else {
+ minLV = (UnitNRShape) iterFilter;
+ maxLV = minLV;
+ lastLevelInCommon = minLV.getLevel();
+ }
+
+ //fast path optimization that is usually true, but never first level
+ if (iterFilter == parent.iterFilter &&
+ (getLevel() <= lastLevelInCommon || parent.iterFirstCellNumber != parent.iterLastCellNumber)) {
+ //TODO benchmark if this optimization pays off. We avoid two comparePrefixLV calls.
+ if (parent.iterFirstIsIntersects && parent.cellNumber == parent.iterFirstCellNumber
+ && minLV.getLevel() >= getLevel()) {
+ iterFirstCellNumber = minLV.getValAtLevel(getLevel());
+ iterFirstIsIntersects = (minLV.getLevel() > getLevel());
+ } else {
+ iterFirstCellNumber = 0;
+ iterFirstIsIntersects = false;
+ }
+ if (parent.iterLastIsIntersects && parent.cellNumber == parent.iterLastCellNumber
+ && maxLV.getLevel() >= getLevel()) {
+ iterLastCellNumber = maxLV.getValAtLevel(getLevel());
+ iterLastIsIntersects = (maxLV.getLevel() > getLevel());
+ } else {
+ iterLastCellNumber = getNumSubCells(parent) - 1;
+ iterLastIsIntersects = false;
+ }
+ if (iterFirstCellNumber == iterLastCellNumber) {
+ if (iterLastIsIntersects)
+ iterFirstIsIntersects = true;
+ else if (iterFirstIsIntersects)
+ iterLastIsIntersects = true;
+ }
+ return;
+ }
+
+ //not common to get here, except for level 1 which always happens
+
+ int startCmp = comparePrefix(minLV, parent);
+ if (startCmp > 0) {//start comes after this cell
+ iterFirstCellNumber = 0;
+ iterFirstIsIntersects = false;
+ iterLastCellNumber = -1;//so ends early (no cells)
+ iterLastIsIntersects = false;
+ return;
+ }
+ int endCmp = comparePrefix(maxLV, parent);//compare to end cell
+ if (endCmp < 0) {//end comes before this cell
+ iterFirstCellNumber = 0;
+ iterFirstIsIntersects = false;
+ iterLastCellNumber = -1;//so ends early (no cells)
+ iterLastIsIntersects = false;
+ return;
+ }
+ if (startCmp < 0 || minLV.getLevel() < getLevel()) {
+ //start comes before...
+ iterFirstCellNumber = 0;
+ iterFirstIsIntersects = false;
+ } else {
+ iterFirstCellNumber = minLV.getValAtLevel(getLevel());
+ iterFirstIsIntersects = (minLV.getLevel() > getLevel());
+ }
+ if (endCmp > 0 || maxLV.getLevel() < getLevel()) {
+ //end comes after...
+ iterLastCellNumber = getNumSubCells(parent) - 1;
+ iterLastIsIntersects = false;
+ } else {
+ iterLastCellNumber = maxLV.getValAtLevel(getLevel());
+ iterLastIsIntersects = (maxLV.getLevel() > getLevel());
+ }
+ if (iterFirstCellNumber == iterLastCellNumber) {
+ if (iterLastIsIntersects)
+ iterFirstIsIntersects = true;
+ else if (iterFirstIsIntersects)
+ iterLastIsIntersects = true;
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ thisCell = null;
+ if (nextCell != null)//calling hasNext twice in a row
+ return true;
+
+ if (cellNumber >= iterLastCellNumber)
+ return false;
+
+ resetCellWithCellNum(cellNumber < iterFirstCellNumber ? iterFirstCellNumber : cellNumber + 1);
+
+ boolean hasChildren =
+ (cellNumber == iterFirstCellNumber && iterFirstIsIntersects)
+ || (cellNumber == iterLastCellNumber && iterLastIsIntersects);
+
+ if (!hasChildren) {
+ setLeaf();
+ setShapeRel(SpatialRelation.WITHIN);
+ } else if (iterFirstCellNumber == iterLastCellNumber) {
+ setShapeRel(SpatialRelation.CONTAINS);
+ } else {
+ setShapeRel(SpatialRelation.INTERSECTS);
+ }
+
+ nextCell = this;
+ return true;
+ }
+
+ //TODO override nextFrom to be more efficient
+
+ //----------- UnitNRShape
+
+ @Override
+ public int getValAtLevel(int level) {
+ final int result = cellsByLevel[level].cellNumber;
+ assert result >= 0;//initialized (decoded)
+ return result;
+ }
+
+ @Override
+ public NRCell getShapeAtLevel(int level) {
+ assert level <= cellLevel;
+ return cellsByLevel[level];
+ }
+
+ @Override
+ public UnitNRShape roundToLevel(int targetLevel) {
+ if (getLevel() <= targetLevel) {
+ return this;
+ } else {
+ return getShapeAtLevel(targetLevel);
+ }
+ }
+
+ @Override
+ public SpatialRelation relate(Shape shape) {
+ assertDecoded();
+ if (shape == iterFilter && cellShapeRel != null)
+ return cellShapeRel;
+ if (shape instanceof UnitNRShape)
+ return relate((UnitNRShape)shape);
+ if (shape instanceof SpanUnitsNRShape)
+ return relate((SpanUnitsNRShape)shape);
+ return shape.relate(this).transpose();
+ }
+
+ public SpatialRelation relate(UnitNRShape lv) {
+ assertDecoded();
+ int cmp = comparePrefix(this, lv);
+ if (cmp != 0)
+ return SpatialRelation.DISJOINT;
+ if (getLevel() > lv.getLevel())
+ return SpatialRelation.WITHIN;
+ return SpatialRelation.CONTAINS;//or equals
+ //no INTERSECTS; that won't happen.
+ }
+
+ public SpatialRelation relate(SpanUnitsNRShape spanShape) {
+ assertDecoded();
+ int startCmp = comparePrefix(spanShape.getMinUnit(), this);
+ if (startCmp > 0) {//start comes after this cell
+ return SpatialRelation.DISJOINT;
+ }
+ int endCmp = comparePrefix(spanShape.getMaxUnit(), this);
+ if (endCmp < 0) {//end comes before this cell
+ return SpatialRelation.DISJOINT;
+ }
+ int nrMinLevel = spanShape.getMinUnit().getLevel();
+ int nrMaxLevel = spanShape.getMaxUnit().getLevel();
+ if ((startCmp < 0 || startCmp == 0 && nrMinLevel <= getLevel())
+ && (endCmp > 0 || endCmp == 0 && nrMaxLevel <= getLevel()))
+ return SpatialRelation.WITHIN;//or equals
+ //At this point it's Contains or Within.
+ if (startCmp != 0 || endCmp != 0)
+ return SpatialRelation.INTERSECTS;
+ //if min or max Level is less, it might be on the equivalent edge.
+ for (;nrMinLevel < getLevel(); nrMinLevel++) {
+ if (getValAtLevel(nrMinLevel + 1) != 0)
+ return SpatialRelation.INTERSECTS;
+ }
+ for (;nrMaxLevel < getLevel(); nrMaxLevel++) {
+ if (getValAtLevel(nrMaxLevel + 1) != getNumSubCells(getShapeAtLevel(nrMaxLevel)) - 1)
+ return SpatialRelation.INTERSECTS;
+ }
+ return SpatialRelation.CONTAINS;
+ }
+
+ @Override
+ public UnitNRShape clone() {
+ //no leaf distinction; this is purely based on UnitNRShape
+ NRCell cell = (NRCell) readCell(getTokenBytesNoLeaf(null), null);
+ cell.ensureOwnTermBytes();
+ return cell.getShape();
+ }
+
+ @Override
+ public int compareTo(UnitNRShape o) {
+ assertDecoded();
+ //no leaf distinction; this is purely based on UnitNRShape
+ int cmp = comparePrefix(this, o);
+ if (cmp != 0) {
+ return cmp;
+ } else {
+ return getLevel() - o.getLevel();
+ }
+ }
+
+ @Override
+ public Rectangle getBoundingBox() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean hasArea() {
+ return true;
+ }
+
+ @Override
+ public double getArea(SpatialContext ctx) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Point getCenter() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Shape getBuffered(double distance, SpatialContext ctx) { throw new UnsupportedOperationException(); }
+
+ @Override
+ public boolean isEmpty() {
+ return false;
+ }
+
+ //------- Object
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof NRCell)) {
+ return false;
+ }
+ if (this == obj)
+ return true;
+ NRCell nrCell = (NRCell) obj;
+ assert term != nrCell.term;
+ if (getLevel() != nrCell.getLevel())
+ return false;
+ //trick to re-use bytesref; provided that we re-instate it
+ int myLastLen = term.length;
+ int otherLastLen = nrCell.term.length;
+ boolean answer = getTokenBytesNoLeaf(term).equals(nrCell.getTokenBytesNoLeaf(nrCell.term));
+ term.length = myLastLen;
+ nrCell.term.length = otherLastLen;
+ return answer;
+ }
+
+ @Override
+ public SpatialContext getContext() {
+ return DUMMY_CTX;
+ }
+
+ @Override
+ public int hashCode() {
+ //trick to re-use bytesref; provided that we re-instate it
+ int myLastLen = term.length;
+ int result = getTokenBytesNoLeaf(term).hashCode();
+ term.length = myLastLen;
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return NumberRangePrefixTree.this.toString(getShape());
+ }
+
+ /** Configure your IDE to use this. */
+ public String toStringDebug() {
+ String pretty = toString();
+ if (getLevel() == 0)
+ return pretty;
+ return toStringUnitRaw(this) + (isLeaf() ? "•" : "") + " " + pretty;
+ }
+
+ } // END OF NRCell
+
+}