You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by ab...@apache.org on 2018/04/16 17:20:41 UTC

[01/46] lucene-solr:jira/solr-11833: LUCENE-8245: Change how crossings are computed.

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-11833 76461f3bc -> 0546c5fce


LUCENE-8245: Change how crossings are computed.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0c715034
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0c715034
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0c715034

Branch: refs/heads/jira/solr-11833
Commit: 0c71503448a66e8766008ae0447e36115ffbdd08
Parents: 5b250b4
Author: Karl Wright <Da...@gmail.com>
Authored: Wed Apr 11 17:53:50 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Wed Apr 11 17:53:50 2018 -0400

----------------------------------------------------------------------
 .../spatial3d/geom/GeoComplexPolygon.java       | 315 +++++++++----------
 .../org/apache/lucene/spatial3d/geom/Plane.java |   5 +-
 .../lucene/spatial3d/geom/GeoPolygonTest.java   |   4 +-
 3 files changed, 143 insertions(+), 181 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0c715034/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
index 5e362f2..81443a5 100644
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
@@ -590,129 +590,6 @@ class GeoComplexPolygon extends GeoBasePolygon {
     }
   }
 
-  private final static GeoPoint[] NO_POINTS = new GeoPoint[0];
-  
-  /** Compute crossings of an envelope plane by an edge.
-  */
-  private GeoPoint[] computeCrossings(final Plane envelopePlane, final Edge edge, final Membership... envelopeBounds) {
-    // Note: there is a possibility that one or both endpoints of the edge actually lies in the inside/outside planes.  If this is the case, those endpoints may or may not show up as crossings.
-    // And yet, we absolutely cannot count each crossing more than once.  So, how do we proceed?
-    // The solution is to recognize that excluded crossings will be excluded for two reasons: (1) bounds, and (2) because there's only one solution to the intersection equation, which means
-    // we never actually crossed the envelope plane.  So, the way we proceed is to look for intersections, but NOT do edge bounds at all.  Then, we consider the results in the context of
-    // the plane we're trying to assess.
-    //System.out.println(" Computing crossings between "+envelopePlane+" and ["+edge.startPoint+"->"+edge.endPoint+"]");
-    
-    final GeoPoint[] unboundedIntersectionPoints = envelopePlane.findIntersections(planetModel, edge.plane);
-    if (unboundedIntersectionPoints == null) {
-      return null;
-    }
-    // Go through the intersection points one at a time.  Notes:
-    // (1) So that we don't double-count, we can only include at most one point in the result per intersection.
-    // (2) Single-solution results imply that the plane was not crossed.  The only time we consider them is if the edge ends on the plane, in which case we count it as a crossing.
-    // (3) We tried to detect the case where the edge ends on the envelope plane by seeing if the intersection point was numerically identical to an endpoint, but that
-    //    was still too strict.
-    // (4) The intersection points will be on both planes, for sure.  The question is whether an intersection point "lines up" with an edge endpoint.  If the edge endpoint
-    //    lies in the envelope plane, then we have the possibility of a detection.  The detection is confirmed if the distance "is small" between the edge endpoint and
-    //    the intersection point.  I see squared linear distance numbers of about 1.35e-24, which is still barely outside of the 1e-12 envelope, so a straight distance
-    //    check won't work.  So this is what I think we need to do:
-    //    (a) Check if endpoint is on envelope plane; if not, we keep going.
-    //    (b) If on envelope plane, we confirm that entire section of plane between intersection point and endpoint lies within envelope plane.  (How??)
-    
-    // If no points, just return.  (I'm not even sure this can happen)
-    if (unboundedIntersectionPoints.length == 0) {
-      //System.out.println("  None found.");
-      return unboundedIntersectionPoints;
-    }
-
-    // Single solution has special logic
-    if (unboundedIntersectionPoints.length == 1) {
-      //System.out.println("  One found.");
-      final GeoPoint thePoint = unboundedIntersectionPoints[0];
-      if (withinBounds(thePoint, envelopeBounds) &&
-        (pointMatches(envelopePlane, thePoint, edge.startPoint) || pointMatches(envelopePlane, thePoint, edge.endPoint) ||
-          (edge.startPlane.isWithin(thePoint) && edge.endPlane.isWithin(thePoint)))) {
-        return unboundedIntersectionPoints;
-      }
-      return NO_POINTS;
-    }
-      
-    // Two solutions: we could return none, one, the other one, or both.
-    //System.out.println("  Two found.");
-
-    final GeoPoint firstPoint = unboundedIntersectionPoints[0];
-    final GeoPoint secondPoint = unboundedIntersectionPoints[1];
-    
-    final boolean useFirstPoint;
-    if (withinBounds(firstPoint, envelopeBounds) &&
-        (pointMatches(envelopePlane, firstPoint, edge.startPoint) || pointMatches(envelopePlane, firstPoint, edge.endPoint) ||
-          (edge.startPlane.isWithin(firstPoint) && edge.endPlane.isWithin(firstPoint)))) {
-      //System.out.println("  Point "+firstPoint+" accepted.");
-      useFirstPoint = true;
-    } else {
-      /*System.out.println("  Point "+firstPoint+" rejected; withinBounds="+withinBounds(firstPoint, envelopeBounds)+
-        "; edgeBounds="+(edge.startPlane.isWithin(firstPoint) && edge.endPlane.isWithin(firstPoint))+
-        "; startPointDist="+edge.startPoint.linearDistanceSquared(firstPoint)+"; endPointDist="+edge.endPoint.linearDistanceSquared(firstPoint)); */
-      useFirstPoint = false;
-    }
-    
-    final boolean useSecondPoint;
-    if (withinBounds(secondPoint, envelopeBounds) &&
-        (pointMatches(envelopePlane, secondPoint, edge.startPoint) || pointMatches(envelopePlane, secondPoint, edge.endPoint) ||
-          (edge.startPlane.isWithin(secondPoint) && edge.endPlane.isWithin(secondPoint)))) {
-      //System.out.println("  Point "+secondPoint+" accepted.");
-      useSecondPoint = true;
-    } else {
-      /*System.out.println("  Point "+secondPoint+" rejected; withinBounds="+withinBounds(secondPoint, envelopeBounds)+
-        "; edgeBounds="+(edge.startPlane.isWithin(secondPoint) && edge.endPlane.isWithin(secondPoint))+
-        "; startPointDist="+edge.startPoint.linearDistanceSquared(secondPoint)+"; endPointDist="+edge.endPoint.linearDistanceSquared(secondPoint)); */
-      useSecondPoint = false;
-    }
-    
-    if (useFirstPoint && useSecondPoint) {
-      return unboundedIntersectionPoints;
-    }
-    
-    if (useFirstPoint) {
-      return new GeoPoint[]{firstPoint};
-    }
-    
-    if (useSecondPoint) {
-      return new GeoPoint[]{secondPoint};
-    }
-    
-    return NO_POINTS;
-  }
-
-  /** This distance is arbitrary, but it must NOT allow non-intersections to be detected.
-  */
-  private final static double MATCH_MAXIMUM_DISTANCE_SQUARED = Vector.MINIMUM_RESOLUTION_SQUARED * 2.0;
-
-  /** Return true of the point matches the edge endpoint, or false otherwise.
-  * This method is here to compensate for the fact that we don't always detect an intersection due to the bounds interfering.
-  */
-  private static boolean pointMatches(final Plane envelopePlane, final GeoPoint intersectionPoint, final GeoPoint edgePoint) {
-    // If edge isn't on the envelope plane, no match
-    if (!envelopePlane.evaluateIsZero(edgePoint)) {
-      return false;
-    }
-    // As a proxy for staying "within" the envelope plane,  compute linear squared distance.  If clearly too close to be anything other than local, we can
-    // just return true.  Otherwise, we'll need to add more complicated fallback computations.
-    if (edgePoint.linearDistanceSquared(intersectionPoint) <= MATCH_MAXIMUM_DISTANCE_SQUARED) {
-      return true;
-    }
-    // More to be done?  Cross that bridge if we come to it.
-    return false;
-  }
-  
-  private static boolean withinBounds(final GeoPoint point, final Membership[] bounds) {
-    for (final Membership bound : bounds) {
-      if (!bound.isWithin(point)) {
-        return false;
-      }
-    }
-    return true;
-  }
-  
   /**
    * An instance of this class describes a single edge, and includes what is necessary to reliably determine intersection
    * in the context of the even/odd algorithm used.
@@ -1083,19 +960,40 @@ class GeoComplexPolygon extends GeoBasePolygon {
       }
       
       // Determine crossings of this edge against all inside/outside planes.  There's no further need to look at the actual travel plane itself.
-      final GeoPoint[] aboveCrossings = computeCrossings(abovePlane, edge, bound);
-      final GeoPoint[] belowCrossings = computeCrossings(belowPlane, edge, bound);
-      
-      if (aboveCrossings != null) {
-        aboveCrossingCount += aboveCrossings.length;
-      }
-      if (belowCrossings != null) {
-        belowCrossingCount += belowCrossings.length;
-      }
+      aboveCrossingCount += countCrossings(edge, abovePlane, bound);
+      belowCrossingCount += countCrossings(edge, belowPlane, bound);
 
       return true;
     }
 
+    /** Find the intersections with an envelope plane, and assess those intersections for 
+      * whether they truly describe crossings.
+      */
+    private int countCrossings(final Edge edge,
+      final Plane envelopePlane, final Membership envelopeBound) {
+      final GeoPoint[] intersections = edge.plane.findIntersections(planetModel, envelopePlane, edge.startPlane, edge.endPlane, envelopeBound);
+      int crossings = 0;
+      if (intersections != null) {
+        for (final GeoPoint intersection : intersections) {
+          // It's unique, so assess it
+          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+        }
+      }
+      return crossings;
+    }
+
+    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint) {
+      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint);
+      int withinCount = 0;
+      for (final GeoPoint adjoining : adjoiningPoints) {
+        if (plane.evaluateIsZero(adjoining) && bound.isWithin(adjoining)) {
+          withinCount++;
+        }
+      }
+      return (withinCount & 1) != 0;
+    }
+
+
   }
 
   /** Count the number of verifiable edge crossings for less than 1/2 a world.
@@ -1153,19 +1051,39 @@ class GeoComplexPolygon extends GeoBasePolygon {
       }
       
       // Determine crossings of this edge against all inside/outside planes.  There's no further need to look at the actual travel plane itself.
-      final GeoPoint[] aboveCrossings = computeCrossings(abovePlane, edge, bound1, bound2);
-      final GeoPoint[] belowCrossings = computeCrossings(belowPlane, edge, bound1, bound2);
-      
-      if (aboveCrossings != null) {
-        aboveCrossingCount += aboveCrossings.length;
-      }
-      if (belowCrossings != null) {
-        belowCrossingCount += belowCrossings.length;
-      }
+      aboveCrossingCount += countCrossings(edge, abovePlane, bound1, bound2);
+      belowCrossingCount += countCrossings(edge, belowPlane, bound1, bound2);
 
       return true;
     }
 
+    /** Find the intersections with an envelope plane, and assess those intersections for 
+      * whether they truly describe crossings.
+      */
+    private int countCrossings(final Edge edge,
+      final Plane envelopePlane, final Membership envelopeBound1, final Membership envelopeBound2) {
+      final GeoPoint[] intersections = edge.plane.findIntersections(planetModel, envelopePlane, edge.startPlane, edge.endPlane, envelopeBound1, envelopeBound2);
+      int crossings = 0;
+      if (intersections != null) {
+        for (final GeoPoint intersection : intersections) {
+          // It's unique, so assess it
+          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+        }
+      }
+      return crossings;
+    }
+
+    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint) {
+      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint);
+      int withinCount = 0;
+      for (final GeoPoint adjoining : adjoiningPoints) {
+        if (plane.evaluateIsZero(adjoining) && bound1.isWithin(adjoining) && bound2.isWithin(adjoining)) {
+          withinCount++;
+        }
+      }
+      return (withinCount & 1) != 0;
+    }
+
   }
   
   /** Count the number of verifiable edge crossings for a dual-leg journey.
@@ -1379,7 +1297,7 @@ class GeoComplexPolygon extends GeoBasePolygon {
       // We've never seen this edge before.  Evaluate it in the context of inner and outer planes.
       computeInsideOutside();
 
-      /*
+      /* 
       System.out.println("\nThe following edges should intersect the travel/testpoint planes:");
       Edge thisEdge = edge;
       while (true) {
@@ -1397,10 +1315,11 @@ class GeoComplexPolygon extends GeoBasePolygon {
         }
       }
       */
+      /*
+      System.out.println("");
+      System.out.println("Considering edge "+(edge.startPoint)+" -> "+(edge.endPoint));
+      */
       
-      //System.out.println("");
-      //System.out.println("Considering edge "+(edge.startPoint)+" -> "+(edge.endPoint));
-
       // Some edges are going to be given to us even when there's no real intersection, so do that as a sanity check, first.
       final GeoPoint[] travelCrossings = travelPlane.findIntersections(planetModel, edge.plane, checkPointCutoffPlane, checkPointOtherCutoffPlane, edge.startPlane, edge.endPlane);
       if (travelCrossings != null && travelCrossings.length == 0) {
@@ -1441,51 +1360,97 @@ class GeoComplexPolygon extends GeoBasePolygon {
       */
       
       // Determine crossings of this edge against all inside/outside planes.  There's no further need to look at the actual travel plane itself.
+      //System.out.println(" Assessing inner crossings...");
+      innerCrossingCount += countCrossings(edge, travelInsidePlane, checkPointCutoffPlane, insideTravelCutoffPlane, testPointInsidePlane, testPointCutoffPlane, insideTestPointCutoffPlane);
+      //System.out.println(" Assessing outer crossings...");
+      outerCrossingCount += countCrossings(edge, travelOutsidePlane, checkPointCutoffPlane, outsideTravelCutoffPlane, testPointOutsidePlane, testPointCutoffPlane, outsideTestPointCutoffPlane);
+      /*
       final GeoPoint[] travelInnerCrossings = computeCrossings(travelInsidePlane, edge, checkPointCutoffPlane, insideTravelCutoffPlane);
       final GeoPoint[] travelOuterCrossings = computeCrossings(travelOutsidePlane, edge, checkPointCutoffPlane, outsideTravelCutoffPlane);
       final GeoPoint[] testPointInnerCrossings = computeCrossings(testPointInsidePlane, edge, testPointCutoffPlane, insideTestPointCutoffPlane);
       final GeoPoint[] testPointOuterCrossings = computeCrossings(testPointOutsidePlane, edge, testPointCutoffPlane, outsideTestPointCutoffPlane);
+      */
       
-      // If the edge goes through the inner-inner intersection point, or the outer-outer intersection point, we need to be sure we count that only once.
-      // It may appear in both lists.  Use a hash for this right now.
-      final Set<GeoPoint> countingHash = new HashSet<>(2);
-      
-      if (travelInnerCrossings != null) {
-        for (final GeoPoint crossing : travelInnerCrossings) {
-          //System.out.println("  Travel inner point "+crossing+"; edgeplane="+edge.plane.evaluate(crossing)+"; travelInsidePlane="+travelInsidePlane.evaluate(crossing)+"; edgestartplane="+edge.startPlane.evaluate(crossing)+"; edgeendplane="+edge.endPlane.evaluate(crossing));
-          countingHash.add(crossing);
-        }
-      }
-      if (testPointInnerCrossings != null) {
-        for (final GeoPoint crossing : testPointInnerCrossings) {
-          //System.out.println("  Test point inner point "+crossing+"; edgeplane="+edge.plane.evaluate(crossing)+"; testPointInsidePlane="+testPointInsidePlane.evaluate(crossing)+"; edgestartplane="+edge.startPlane.evaluate(crossing)+"; edgeendplane="+edge.endPlane.evaluate(crossing));
-          countingHash.add(crossing);
+      return true;
+    }
+
+    /** Find the intersections with a pair of envelope planes, and assess those intersections for duplication and for
+      * whether they truly describe crossings.
+      */
+    private int countCrossings(final Edge edge,
+      final Plane travelEnvelopePlane, final Membership travelEnvelopeBound1, final Membership travelEnvelopeBound2,
+      final Plane testPointEnvelopePlane, final Membership testPointEnvelopeBound1, final Membership testPointEnvelopeBound2) {
+      final GeoPoint[] travelIntersections = edge.plane.findIntersections(planetModel, travelEnvelopePlane, edge.startPlane, edge.endPlane, travelEnvelopeBound1, travelEnvelopeBound2);
+      final GeoPoint[] testPointIntersections = edge.plane.findIntersections(planetModel, testPointEnvelopePlane, edge.startPlane, edge.endPlane, testPointEnvelopeBound1, testPointEnvelopeBound2);
+      int crossings = 0;
+      if (travelIntersections != null) {
+        for (final GeoPoint intersection : travelIntersections) {
+          // Make sure it's not a dup
+          boolean notDup = true;
+          if (testPointIntersections != null) {
+            for (final GeoPoint otherIntersection : testPointIntersections) {
+              if (intersection.isNumericallyIdentical(otherIntersection)) {
+                notDup = false;
+                break;
+              }
+            }
+          }
+          if (!notDup) {
+            continue;
+          }
+          // It's unique, so assess it
+          //System.out.println("  Assessing travel intersection point "+intersection+"...");
+          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
         }
       }
-      //System.out.println(" Edge added "+countingHash.size()+" to innerCrossingCount");
-      innerCrossingCount += countingHash.size();
-      
-      countingHash.clear();
-      if (travelOuterCrossings != null) {
-        for (final GeoPoint crossing : travelOuterCrossings) {
-          //System.out.println("  Travel outer point "+crossing+"; edgeplane="+edge.plane.evaluate(crossing)+"; travelOutsidePlane="+travelOutsidePlane.evaluate(crossing)+"; edgestartplane="+edge.startPlane.evaluate(crossing)+"; edgeendplane="+edge.endPlane.evaluate(crossing));
-          countingHash.add(crossing);
+      if (testPointIntersections != null) {
+        for (final GeoPoint intersection : testPointIntersections) {
+          // It's unique, so assess it
+          //System.out.println("  Assessing testpoint intersection point "+intersection+"...");
+          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
         }
       }
-      if (testPointOuterCrossings != null) {
-        for (final GeoPoint crossing : testPointOuterCrossings) {
-          //System.out.println("  Test point outer point "+crossing+"; edgeplane="+edge.plane.evaluate(crossing)+"; testPointOutsidePlane="+testPointOutsidePlane.evaluate(crossing)+"; edgestartplane="+edge.startPlane.evaluate(crossing)+"; edgeendplane="+edge.endPlane.evaluate(crossing));
-          countingHash.add(crossing);
+      return crossings;
+    }
+
+    /** Return true if the edge crosses the envelope plane, given the envelope intersection point.
+      */
+    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint) {
+      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint);
+      int withinCount = 0;
+      for (final GeoPoint adjoining : adjoiningPoints) {
+        if ((travelPlane.evaluateIsZero(adjoining) && checkPointCutoffPlane.isWithin(adjoining) && checkPointOtherCutoffPlane.isWithin(adjoining)) ||
+          (testPointPlane.evaluateIsZero(adjoining) && testPointCutoffPlane.isWithin(adjoining) && testPointOtherCutoffPlane.isWithin(adjoining))) {
+          //System.out.println("   Adjoining point "+adjoining+" (dist = "+intersectionPoint.linearDistance(adjoining)+") is within");
+          withinCount++;
+        } else {
+          //System.out.println("   Adjoining point "+adjoining+" (dist = "+intersectionPoint.linearDistance(adjoining)+") is not within");
         }
       }
-      //System.out.println(" Edge added "+countingHash.size()+" to outerCrossingCount");
-      outerCrossingCount += countingHash.size();
-
-      return true;
+      return (withinCount & 1) != 0;
     }
 
   }
   
+    
+  
+  /** Given a point on the plane and the ellipsoid, this method looks for a pair of adjoining points on either side of the plane, which are
+   * about MINIMUM_RESOLUTION away from the given point.  This only works for planes which go through the center of the world.
+   */
+  private GeoPoint[] findAdjoiningPoints(final Plane plane, final GeoPoint pointOnPlane) {
+    // Compute a normalized perpendicular vector
+    final Vector perpendicular = new Vector(plane, pointOnPlane);
+    // Compute two new points along this vector from the original
+    final GeoPoint pointA = planetModel.createSurfacePoint(pointOnPlane.x + perpendicular.x * Vector.MINIMUM_RESOLUTION,
+      pointOnPlane.y + perpendicular.y * Vector.MINIMUM_RESOLUTION,
+      pointOnPlane.z + perpendicular.z * Vector.MINIMUM_RESOLUTION);
+    final GeoPoint pointB = planetModel.createSurfacePoint(pointOnPlane.x - perpendicular.x * Vector.MINIMUM_RESOLUTION,
+      pointOnPlane.y - perpendicular.y * Vector.MINIMUM_RESOLUTION,
+      pointOnPlane.z - perpendicular.z * Vector.MINIMUM_RESOLUTION);
+    //System.out.println("Distance: "+computeSquaredDistance(rval[0], pointOnPlane)+" and "+computeSquaredDistance(rval[1], pointOnPlane));
+    return new GeoPoint[]{pointA, pointB};
+  }
+
   private static double computeSquaredDistance(final GeoPoint checkPoint, final GeoPoint intersectionPoint) {
     final double distanceX = checkPoint.x - intersectionPoint.x;
     final double distanceY = checkPoint.y - intersectionPoint.y;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0c715034/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java
index b47cffd..d03e624 100755
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java
@@ -23,9 +23,6 @@ package org.apache.lucene.spatial3d.geom;
  * @lucene.experimental
  */
 public class Plane extends Vector {
-  /** For plane envelopes, we need a small distance that can't lead to numerical confusion.  This spacing is large enough to
-    * avoid numerical confusion, but still permit all points within the envelope to belong to one or another plane. */
-  public final static double MINIMUM_PLANE_OFFSET = MINIMUM_RESOLUTION * 2.0;
   /** An array with no points in it */
   public final static GeoPoint[] NO_POINTS = new GeoPoint[0];
   /** An array with no bounds in it */
@@ -117,7 +114,7 @@ public class Plane extends Vector {
    *   or false in the negative direction.
    */
   public Plane(final Plane basePlane, final boolean above) {
-    this(basePlane.x, basePlane.y, basePlane.z, above?Math.nextUp(basePlane.D + MINIMUM_PLANE_OFFSET):Math.nextDown(basePlane.D - MINIMUM_PLANE_OFFSET));
+    this(basePlane.x, basePlane.y, basePlane.z, above?Math.nextUp(basePlane.D + MINIMUM_RESOLUTION):Math.nextDown(basePlane.D - MINIMUM_RESOLUTION));
   }
   
   /** Construct the most accurate normalized plane through an x-y point and including the Z axis.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0c715034/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
index 03d1e9e..1311f4e 100755
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
@@ -25,7 +25,6 @@ import org.apache.lucene.util.LuceneTestCase;
 
 import org.junit.Test;
 
-
 public class GeoPolygonTest extends LuceneTestCase {
 
   @Test
@@ -1429,6 +1428,7 @@ shape:
   }
 
   @Test
+  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
   public void testComplexPolygonPlaneOutsideWorld() {
     List<GeoPoint> points = new ArrayList<>();
     points.add(new GeoPoint(PlanetModel.SPHERE, -0.5, -0.5));
@@ -1503,6 +1503,7 @@ shape:
   }
 
   @Test
+  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
   public void testLUCENE8245() {
     //POLYGON((-70.19447784626787 -83.117346007187,0.0 2.8E-322,-139.99870438810106 7.994601469571884,-143.14292702670522 -18.500141088122664,-158.7373186858464 -35.42942085357812,-70.19447784626787 -83.117346007187))
     final List<GeoPoint> points = new ArrayList<>();
@@ -1537,7 +1538,6 @@ shape:
   }
 
   @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
   public void testLUCENE8245_case3() {
     //POLYGON((144.76249846857021 8.828705232593283,166.00162989841027 -8.5E-322,157.03429484830787 64.92565566857392,108.64696979831984 39.10241638996957,102.54234512410089 20.471658760034586,144.76249846857021 8.828705232593283))
     final List<GeoPoint> points = new ArrayList<>();

[34/46] lucene-solr:jira/solr-11833: SOLR-10513: Implement .equals() for LuceneLevenshteinDistance.

Posted by ab...@apache.org.

SOLR-10513: Implement .equals() for LuceneLevenshteinDistance.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/12bd5f94
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/12bd5f94
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/12bd5f94

Branch: refs/heads/jira/solr-11833
Commit: 12bd5f9448f70b9fdc450dac916dbd1a83edafbc
Parents: 9a149ad
Author: jdyer1 <jd...@apache.org>
Authored: Fri Apr 13 11:10:21 2018 -0500
Committer: jdyer1 <jd...@apache.org>
Committed: Fri Apr 13 11:12:46 2018 -0500

----------------------------------------------------------------------
 .../search/spell/LuceneLevenshteinDistance.java |  8 +++++
 solr/CHANGES.txt                                |  2 ++
 .../spelling/ConjunctionSolrSpellChecker.java   |  3 +-
 .../ConjunctionSolrSpellCheckerTest.java        | 35 ++++++++++++++++----
 4 files changed, 40 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/12bd5f94/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java b/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java
index 3ba0bd3..21ca535 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneLevenshteinDistance.java
@@ -114,4 +114,12 @@ public final class LuceneLevenshteinDistance implements StringDistance {
     }
     return ref;
   }
+  
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) return true;
+    if (null == obj) return false;
+    return (getClass() == obj.getClass());
+  }
+  
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/12bd5f94/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 999d2f6..399a2a7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -154,6 +154,8 @@ Bug Fixes
   (Rohit, Varun Thacker)
 
 * SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir (Steve Rowe, Amrit Sarkar via Varun Thacker)
+
+* SOLR-10513: ConjunctionSolrSpellChecker did not work with LuceneLevenshteinDistance (Amrit Sarkar via James Dyer)
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/12bd5f94/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
index 2daab28..f92919d 100644
--- a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
@@ -81,9 +81,8 @@ public class ConjunctionSolrSpellChecker extends SolrSpellChecker {
     checkers.add(checker);
   }
   
-  @SuppressWarnings("unchecked")
   @Override
-  public String init(NamedList config, SolrCore core) {
+  public String init(@SuppressWarnings("rawtypes") NamedList config, SolrCore core) {
     for (int i = 0; i < checkers.size(); i++) {
       SolrSpellChecker c = checkers.get(i);
       String dn = c.init(config, core);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/12bd5f94/solr/core/src/test/org/apache/solr/spelling/ConjunctionSolrSpellCheckerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/ConjunctionSolrSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/ConjunctionSolrSpellCheckerTest.java
index 0df837f..8ec933b 100644
--- a/solr/core/src/test/org/apache/solr/spelling/ConjunctionSolrSpellCheckerTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/ConjunctionSolrSpellCheckerTest.java
@@ -18,27 +18,50 @@ package org.apache.solr.spelling;
 
 import java.io.IOException;
 
+import org.apache.lucene.search.spell.JaroWinklerDistance;
 import org.apache.lucene.search.spell.LevenshteinDistance;
+import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
 import org.apache.lucene.search.spell.NGramDistance;
 import org.apache.lucene.search.spell.StringDistance;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class ConjunctionSolrSpellCheckerTest extends LuceneTestCase {
   
+  public static final Class<?>[] AVAILABLE_DISTANCES = {LevenshteinDistance.class, LuceneLevenshteinDistance.class,
+      JaroWinklerDistance.class, NGramDistance.class};
+
   @Test
   public void test() throws Exception {
     ConjunctionSolrSpellChecker cssc = new ConjunctionSolrSpellChecker();
-    MockSolrSpellChecker levenstein1 = new MockSolrSpellChecker(new LevenshteinDistance());
-    MockSolrSpellChecker levenstein2 = new MockSolrSpellChecker(new LevenshteinDistance());
-    MockSolrSpellChecker ngram = new MockSolrSpellChecker(new NGramDistance());
+    @SuppressWarnings("unchecked")
+    Class<StringDistance> sameDistance = (Class<StringDistance>) AVAILABLE_DISTANCES[random().nextInt(AVAILABLE_DISTANCES.length)];
+    
+    StringDistance sameDistance1 = sameDistance.newInstance();
+    StringDistance sameDistance2 = sameDistance.newInstance();
+    
+    //NGramDistance defaults to 2, so we'll try 3 or 4 to ensure we have one that is not-equal.
+    StringDistance differentDistance = new NGramDistance(3);
+    if(sameDistance1.equals(differentDistance)) {
+      differentDistance = new NGramDistance(4);
+      if(sameDistance1.equals(differentDistance)) {
+        fail("Cannot set up test.  2 NGramDistances with different gram sizes should not be equal.");
+      }
+    }
+    Assert.assertEquals("The distance " + sameDistance + " does not properly implement equals.", sameDistance1, sameDistance2);
+    
+    
+    MockSolrSpellChecker checker1 = new MockSolrSpellChecker(sameDistance1);
+    MockSolrSpellChecker checker2 = new MockSolrSpellChecker(sameDistance2);
+    MockSolrSpellChecker checker3 = new MockSolrSpellChecker(differentDistance);
     
-    cssc.addChecker(levenstein1);
-    cssc.addChecker(levenstein2);
+    cssc.addChecker(checker1);
+    cssc.addChecker(checker2);
     try {
-      cssc.addChecker(ngram);
+      cssc.addChecker(checker3);
       fail("ConjunctionSolrSpellChecker should have thrown an exception about non-identical StringDistances.");
     } catch (IllegalArgumentException iae) {
       // correct behavior

[22/46] lucene-solr:jira/solr-11833: LUCENE-8251: Add an explicit test case to cover the discovered failure. But it appears to be already fixed.

Posted by ab...@apache.org.

LUCENE-8251: Add an explicit test case to cover the discovered failure.  But it appears to be already fixed.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f41e7c4d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f41e7c4d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f41e7c4d

Branch: refs/heads/jira/solr-11833
Commit: f41e7c4da7e5386541c9ad2cf0cf6a98d0d41c54
Parents: e8f1649
Author: Karl Wright <Da...@gmail.com>
Authored: Fri Apr 13 00:10:57 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Fri Apr 13 00:10:57 2018 -0400

----------------------------------------------------------------------
 .../apache/lucene/spatial3d/geom/Geo3DUtil.java   |  7 +++++++
 .../lucene/spatial3d/geom/GeoPolygonTest.java     | 18 ++++++++++++++++++
 .../spatial3d/geom/RandomGeoPolygonTest.java      |  2 +-
 3 files changed, 26 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f41e7c4d/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/Geo3DUtil.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/Geo3DUtil.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/Geo3DUtil.java
index add46e4..9a22abd 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/Geo3DUtil.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/Geo3DUtil.java
@@ -40,6 +40,8 @@ class Geo3DUtil {
   final static double RADIANS_PER_METER = 1.0 / PlanetModel.WGS84_MEAN;
   /** How many radians are in one degree */
   final static double RADIANS_PER_DEGREE = Math.PI / 180.0;
+  /** How many degrees in a radian */
+  final static double DEGREES_PER_RADIAN = 180.0 / Math.PI;
   
   private static final double MAX_VALUE = PlanetModel.WGS84.getMaximumMagnitude();
   private static final int BITS = 32;
@@ -113,6 +115,11 @@ class Geo3DUtil {
   static double fromDegrees(final double degrees) {
     return degrees * RADIANS_PER_DEGREE;
   }
+
+  /** Converts radians to degrees */
+  static double toDegrees(final double radians) {
+    return radians * DEGREES_PER_RADIAN;
+  }
   
   /** Converts earth-surface meters to radians */
   static double fromMeters(final double meters) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f41e7c4d/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
index adff16c..e720cb2 100755
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
@@ -1569,4 +1569,22 @@ shape:
     assertTrue(polygon.isWithin(point) == largePolygon.isWithin(point));
   }
   
+  @Test
+  public void testLUCENE8251() {
+    //POLYGON((135.63207358036593 -51.43541696593334,113.00782694696038 -58.984559858566556,0.0 -3.68E-321,-66.33598777585381 -7.382056816201731,135.63207358036593 -51.43541696593334))
+    final List<GeoPoint> points = new ArrayList<>();
+    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-51.43541696593334), Geo3DUtil.fromDegrees(135.63207358036593)));
+    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-58.984559858566556), Geo3DUtil.fromDegrees(113.00782694696038)));
+    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-3.68E-321), Geo3DUtil.fromDegrees(0.0)));
+    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-7.382056816201731), Geo3DUtil.fromDegrees(-66.33598777585381)));
+    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-51.43541696593334), Geo3DUtil.fromDegrees(135.63207358036593)));
+    final GeoPolygonFactory.PolygonDescription description = new GeoPolygonFactory.PolygonDescription(points);
+    final GeoPolygon polygon = GeoPolygonFactory.makeGeoPolygon(PlanetModel.SPHERE, description);
+    final GeoPolygon largePolygon = GeoPolygonFactory.makeLargeGeoPolygon(PlanetModel.SPHERE, Collections.singletonList(description));
+
+    //POINT(0.005183505059185348 1.98E-321)
+    final GeoPoint point = new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(1.98E-321), Geo3DUtil.fromDegrees(0.005183505059185348));
+    assertTrue(polygon.isWithin(point) == largePolygon.isWithin(point));
+  }
+  
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f41e7c4d/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
index b6364e0..6c5e890 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
@@ -92,7 +92,7 @@ public class RandomGeoPolygonTest extends RandomGeo3dShapeGenerator {
    * biased doubles.
    */
   @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
   @Repeat(iterations = 10)
   public void testComparePolygons() {
     final PlanetModel planetModel = randomPlanetModel();

[03/46] lucene-solr:jira/solr-11833: SOLR-11982: Add support for indicating preferred replica types for queries

Posted by ab...@apache.org.

SOLR-11982: Add support for indicating preferred replica types for queries


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8927d469
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8927d469
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8927d469

Branch: refs/heads/jira/solr-11833
Commit: 8927d469cb05c27db6864076010de039302c7e25
Parents: 5bd7b03
Author: Tomas Fernandez Lobbe <tf...@apache.org>
Authored: Wed Apr 11 16:23:00 2018 -0700
Committer: Tomas Fernandez Lobbe <tf...@apache.org>
Committed: Wed Apr 11 16:23:00 2018 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../component/HttpShardHandlerFactory.java      | 165 +++++++++++++------
 .../component/TestHttpShardHandlerFactory.java  | 119 +++++++++++++
 .../src/distributed-requests.adoc               |  40 +++++
 .../shards-and-indexing-data-in-solrcloud.adoc  |   4 +
 .../apache/solr/common/params/CommonParams.java |   1 +
 .../apache/solr/common/params/ShardParams.java  |  12 ++
 .../client/solrj/impl/CloudSolrClientTest.java  | 143 ++++++++++++++--
 8 files changed, 425 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 84cac13..c07c1f7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -90,6 +90,9 @@ New Features
 
 * SOLR-12181: Add index size autoscaling trigger, based on document count or size in bytes. (ab)
 
+* SOLR-11982: Add possibility to define replica order with the shards.preference parameter to e.g. prefer PULL replicas
+  for distributed queries. (Ere Maijala, Tomás Fernández Löbbe)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
index 8cff025..4e2a794 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
@@ -29,12 +29,12 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.common.util.URLUtil;
-import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.PluginInfo;
 import org.apache.solr.core.SolrInfoBean;
 import org.apache.solr.metrics.SolrMetricManager;
@@ -51,6 +51,7 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
@@ -303,30 +304,61 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
   /**
    * A distributed request is made via {@link LBHttpSolrClient} to the first live server in the URL list.
    * This means it is just as likely to choose current host as any of the other hosts.
-   * This function makes sure that the cores of current host are always put first in the URL list.
-   * If all nodes prefer local-cores then a bad/heavily-loaded node will receive less requests from healthy nodes.
-   * This will help prevent a distributed deadlock or timeouts in all the healthy nodes due to one bad node.
+   * This function makes sure that the cores are sorted according to the given list of preferences.
+   * E.g. If all nodes prefer local cores then a bad/heavily-loaded node will receive less requests from 
+   * healthy nodes. This will help prevent a distributed deadlock or timeouts in all the healthy nodes due 
+   * to one bad node.
    */
-  private static class IsOnPreferredHostComparator implements Comparator<Object> {
-    final private String preferredHostAddress;
-    public IsOnPreferredHostComparator(String preferredHostAddress) {
-      this.preferredHostAddress = preferredHostAddress;
+  static class NodePreferenceRulesComparator implements Comparator<Object> {
+    private static class PreferenceRule {
+      public final String name;
+      public final String value;
+
+      public PreferenceRule(String name, String value) {
+        this.name = name;
+        this.value = value;
+      }
+    }
+
+    private final SolrQueryRequest request;
+    private List<PreferenceRule> preferenceRules;
+    private String localHostAddress = null;
+
+    public NodePreferenceRulesComparator(final List<String> sortRules, final SolrQueryRequest request) {
+      this.request = request;
+      this.preferenceRules = new ArrayList<PreferenceRule>(sortRules.size());
+      sortRules.forEach(rule -> {
+        String[] parts = rule.split(":", 2);
+        if (parts.length != 2) {
+          throw new IllegalArgumentException("Invalid " + ShardParams.SHARDS_PREFERENCE + " rule: " + rule);
+        }
+        this.preferenceRules.add(new PreferenceRule(parts[0], parts[1])); 
+      });
     }
     @Override
     public int compare(Object left, Object right) {
-      final boolean lhs = hasPrefix(objectToString(left));
-      final boolean rhs = hasPrefix(objectToString(right));
-      if (lhs != rhs) {
-        if (lhs) {
-          return -1;
-        } else {
-          return +1;
+      for (PreferenceRule preferenceRule: this.preferenceRules) {
+        final boolean lhs;
+        final boolean rhs;
+        switch (preferenceRule.name) {
+          case ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE:
+            lhs = hasReplicaType(left, preferenceRule.value);
+            rhs = hasReplicaType(right, preferenceRule.value);
+            break;
+          case ShardParams.SHARDS_PREFERENCE_REPLICA_LOCATION:
+            lhs = hasCoreUrlPrefix(left, preferenceRule.value);
+            rhs = hasCoreUrlPrefix(right, preferenceRule.value);
+            break;
+          default:
+            throw new IllegalArgumentException("Invalid " + ShardParams.SHARDS_PREFERENCE + " type: " + preferenceRule.name);
+        }
+        if (lhs != rhs) {
+          return lhs ? -1 : +1;
         }
-      } else {
-        return 0;
       }
+      return 0;
     }
-    private String objectToString(Object o) {
+    private boolean hasCoreUrlPrefix(Object o, String prefix) {
       final String s;
       if (o instanceof String) {
         s = (String)o;
@@ -334,44 +366,80 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
       else if (o instanceof Replica) {
         s = ((Replica)o).getCoreUrl();
       } else {
-        s = null;
+        return false;
       }
-      return s;
+      if (prefix.equals(ShardParams.REPLICA_LOCAL)) {
+        if (null == localHostAddress) {
+          final ZkController zkController = this.request.getCore().getCoreContainer().getZkController();
+          localHostAddress = zkController != null ? zkController.getBaseUrl() : "";
+          if (localHostAddress.isEmpty()) {
+            log.warn("Couldn't determine current host address for sorting of local replicas");
+          }
+        }
+        if (!localHostAddress.isEmpty()) {
+          if (s.startsWith(localHostAddress)) {
+            return true;
+          }
+        }
+      } else {
+        if (s.startsWith(prefix)) {
+          return true;
+        }
+      }
+      return false;
     }
-    private boolean hasPrefix(String s) {
-      return s != null && s.startsWith(preferredHostAddress);
+    private static boolean hasReplicaType(Object o, String preferred) {
+      if (!(o instanceof Replica)) {
+        return false;
+      }
+      final String s = ((Replica)o).getType().toString();
+      return s.equals(preferred);
     }
   }
-  protected ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req)
-  {
+
+  protected ReplicaListTransformer getReplicaListTransformer(final SolrQueryRequest req) {
     final SolrParams params = req.getParams();
+    @SuppressWarnings("deprecation")
+    final boolean preferLocalShards = params.getBool(CommonParams.PREFER_LOCAL_SHARDS, false);
+    final String shardsPreferenceSpec = params.get(ShardParams.SHARDS_PREFERENCE, "");
+
+    if (preferLocalShards || !shardsPreferenceSpec.isEmpty()) {
+      if (preferLocalShards && !shardsPreferenceSpec.isEmpty()) {
+        throw new SolrException(
+          SolrException.ErrorCode.BAD_REQUEST,
+          "preferLocalShards is deprecated and must not be used with shards.preference" 
+        );
+      }
+      List<String> preferenceRules = StrUtils.splitSmart(shardsPreferenceSpec, ',');
+      if (preferLocalShards) {
+        preferenceRules.add(ShardParams.SHARDS_PREFERENCE_REPLICA_LOCATION + ":" + ShardParams.REPLICA_LOCAL);
+      }
 
-    if (params.getBool(CommonParams.PREFER_LOCAL_SHARDS, false)) {
-      final CoreDescriptor coreDescriptor = req.getCore().getCoreDescriptor();
-      final ZkController zkController = req.getCore().getCoreContainer().getZkController();
-      final String preferredHostAddress = (zkController != null) ? zkController.getBaseUrl() : null;
-      if (preferredHostAddress == null) {
-        log.warn("Couldn't determine current host address to prefer local shards");
-      } else {
-        return new ShufflingReplicaListTransformer(r) {
-          @Override
-          public void transform(List<?> choices)
-          {
-            if (choices.size() > 1) {
-              super.transform(choices);
-              if (log.isDebugEnabled()) {
-                log.debug("Trying to prefer local shard on {} among the choices: {}",
-                    preferredHostAddress, Arrays.toString(choices.toArray()));
-              }
-              choices.sort(new IsOnPreferredHostComparator(preferredHostAddress));
-              if (log.isDebugEnabled()) {
-                log.debug("Applied local shard preference for choices: {}",
-                    Arrays.toString(choices.toArray()));
-              }
+      return new ShufflingReplicaListTransformer(r) {
+        @Override
+        public void transform(List<?> choices)
+        {
+          if (choices.size() > 1) {
+            super.transform(choices);
+            if (log.isDebugEnabled()) {
+              log.debug("Applying the following sorting preferences to replicas: {}",
+                  Arrays.toString(preferenceRules.toArray()));
+            }
+            try {
+              choices.sort(new NodePreferenceRulesComparator(preferenceRules, req));
+            } catch (IllegalArgumentException iae) {
+              throw new SolrException(
+                SolrException.ErrorCode.BAD_REQUEST,
+                iae.getMessage()
+              );
+            }
+            if (log.isDebugEnabled()) {
+              log.debug("Applied sorting preferences to replica list: {}",
+                  Arrays.toString(choices.toArray()));
             }
           }
-        };
-      }
+        }
+      };
     }
 
     return shufflingReplicaListTransformer;
@@ -409,4 +477,5 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
         manager.registry(registry),
         SolrMetricManager.mkName("httpShardExecutor", expandedScope, "threadPool"));
   }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java b/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java
index 3ffa015..523e31d 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java
@@ -24,6 +24,10 @@ import java.util.List;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.impl.LBHttpSolrClient;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.handler.component.HttpShardHandlerFactory;
 import org.apache.solr.handler.component.ShardHandlerFactory;
@@ -99,4 +103,119 @@ public class TestHttpShardHandlerFactory extends SolrTestCaseJ4 {
     }
   }
 
+  @SuppressWarnings("unchecked")
+  public void testNodePreferenceRulesComparator() throws Exception {
+    List<Replica> replicas = new ArrayList<Replica>();
+    replicas.add(
+      new Replica(
+        "node1",
+        map(
+          ZkStateReader.BASE_URL_PROP, "http://host1:8983/solr",
+          ZkStateReader.NODE_NAME_PROP, "node1",
+          ZkStateReader.CORE_NAME_PROP, "collection1",
+          ZkStateReader.REPLICA_TYPE, "NRT"
+        )
+      )
+    );
+    replicas.add(
+      new Replica(
+        "node2",
+        map(
+          ZkStateReader.BASE_URL_PROP, "http://host2:8983/solr",
+          ZkStateReader.NODE_NAME_PROP, "node2",
+          ZkStateReader.CORE_NAME_PROP, "collection1",
+          ZkStateReader.REPLICA_TYPE, "TLOG"
+        )
+      )
+    );
+    replicas.add(
+      new Replica(
+        "node3",
+        map(
+          ZkStateReader.BASE_URL_PROP, "http://host2_2:8983/solr",
+          ZkStateReader.NODE_NAME_PROP, "node3",
+          ZkStateReader.CORE_NAME_PROP, "collection1",
+          ZkStateReader.REPLICA_TYPE, "PULL"
+        )
+      )
+    );
+
+    // Simple replica type rule
+    List<String> rules = StrUtils.splitSmart(
+      ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE + ":NRT," + 
+      ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE + ":TLOG", 
+      ','
+    );
+    HttpShardHandlerFactory.NodePreferenceRulesComparator comparator = 
+      new HttpShardHandlerFactory.NodePreferenceRulesComparator(rules, null);
+    replicas.sort(comparator);
+    assertEquals("node1", replicas.get(0).getNodeName());
+    assertEquals("node2", replicas.get(1).getNodeName());
+
+    // Another simple replica type rule
+    rules = StrUtils.splitSmart(
+      ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE + ":TLOG," + 
+      ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE + ":NRT", 
+      ','
+    );
+    comparator = new HttpShardHandlerFactory.NodePreferenceRulesComparator(rules, null);
+    replicas.sort(comparator);
+    assertEquals("node2", replicas.get(0).getNodeName());
+    assertEquals("node1", replicas.get(1).getNodeName());
+
+    // replicaLocation rule
+    rules = StrUtils.splitSmart(ShardParams.SHARDS_PREFERENCE_REPLICA_LOCATION + ":http://host2:8983", ',');
+    comparator = new HttpShardHandlerFactory.NodePreferenceRulesComparator(rules, null);
+    replicas.sort(comparator);
+    assertEquals("node2", replicas.get(0).getNodeName());
+    assertEquals("node1", replicas.get(1).getNodeName());
+
+    // Add a replica so that sorting by replicaType:TLOG can cause a tie
+    replicas.add(
+      new Replica(
+        "node4",
+        map(
+          ZkStateReader.BASE_URL_PROP, "http://host2_2:8983/solr",
+          ZkStateReader.NODE_NAME_PROP, "node4",
+          ZkStateReader.CORE_NAME_PROP, "collection1",
+          ZkStateReader.REPLICA_TYPE, "TLOG"
+        )
+      )
+    );
+
+    // replicaType and replicaLocation combined rule
+    rules = StrUtils.splitSmart(
+      ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE + ":NRT," + 
+      ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE + ":TLOG," + 
+      ShardParams.SHARDS_PREFERENCE_REPLICA_LOCATION + ":http://host2_2", 
+      ','
+    );
+    comparator = new HttpShardHandlerFactory.NodePreferenceRulesComparator(rules, null);
+    replicas.sort(comparator);
+    assertEquals("node1", replicas.get(0).getNodeName());
+    assertEquals("node4", replicas.get(1).getNodeName());
+    assertEquals("node2", replicas.get(2).getNodeName());
+    assertEquals("node3", replicas.get(3).getNodeName());
+
+    // Bad rule
+    rules = StrUtils.splitSmart(ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE, ',');
+    try {
+      comparator = new HttpShardHandlerFactory.NodePreferenceRulesComparator(rules, null);
+      replicas.sort(comparator);
+      fail();
+    } catch (IllegalArgumentException e) {
+      assertEquals("Invalid shards.preference rule: " + ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE, e.getMessage());
+    }
+
+    // Unknown rule
+    rules = StrUtils.splitSmart("badRule:test", ',');
+    try {
+      comparator = new HttpShardHandlerFactory.NodePreferenceRulesComparator(rules, null);
+      replicas.sort(comparator);
+      fail();
+    } catch (IllegalArgumentException e) {
+      assertEquals("Invalid shards.preference type: badRule", e.getMessage());
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/solr-ref-guide/src/distributed-requests.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/distributed-requests.adoc b/solr/solr-ref-guide/src/distributed-requests.adoc
index 096f632..b199c7e 100644
--- a/solr/solr-ref-guide/src/distributed-requests.adoc
+++ b/solr/solr-ref-guide/src/distributed-requests.adoc
@@ -138,6 +138,46 @@ For example, a deadlock might occur in the case of two shards, each with just a
 
 == preferLocalShards Parameter
 
+Deprecated, use `shards.preference=replica.location:local` instead (see below).
+
 Solr allows you to pass an optional boolean parameter named `preferLocalShards` to indicate that a distributed query should prefer local replicas of a shard when available. In other words, if a query includes `preferLocalShards=true`, then the query controller will look for local replicas to service the query instead of selecting replicas at random from across the cluster. This is useful when a query requests many fields or large fields to be returned per document because it avoids moving large amounts of data over the network when it is available locally. In addition, this feature can be useful for minimizing the impact of a problematic replica with degraded performance, as it reduces the likelihood that the degraded replica will be hit by other healthy replicas.
 
 Lastly, it follows that the value of this feature diminishes as the number of shards in a collection increases because the query controller will have to direct the query to non-local replicas for most of the shards. In other words, this feature is mostly useful for optimizing queries directed towards collections with a small number of shards and many replicas. Also, this option should only be used if you are load balancing requests across all nodes that host replicas for the collection you are querying, as Solr's CloudSolrClient will do. If not load-balancing, this feature can introduce a hotspot in the cluster since queries won't be evenly distributed across the cluster.
+
+== shards.preference Parameter
+
+Solr allows you to pass an optional string parameter named `shards.preference` to indicate that a distributed query should sort the available replicas in the given order of precedence within each shard. The syntax is: `shards.preference=property:value`. The order of the properties and the values are significant meaning that the first one is the primary sort, second one is secondary etc.
+
+IMPORTANT: `shards.preference` only works for distributed queries, i.e. queries targeting multiple shards. Not implemented yet for single shard scenarios
+
+The properties that can be specified are as follows:
+
+`replica.type`::
+One or more replica types that are preferred. Any combination of PULL, TLOG and NRT is allowed.
+
+`replica.location`::
+One or more replica locations that are preferred. A location starts with `http://hostname:port`. Matching is done for the given string as a prefix, so it's possible to e.g. leave out the port. `local` may be used as special value to denote any local replica running on the same Solr instance as the one handling the query. This is useful when a query requests many fields or large fields to be returned per document because it avoids moving large amounts of data over the network when it is available locally. In addition, this feature can be useful for minimizing the impact of a problematic replica with degraded performance, as it reduces the likelihood that the degraded replica will be hit by other healthy replicas.
+
+The value of `replica.location:local` diminishes as the number of shards (that have no locally-available replicas) in a collection increases because the query controller will have to direct the query to non-local replicas for most of the shards. In other words, this feature is mostly useful for optimizing queries directed towards collections with a small number of shards and many replicas. Also, this option should only be used if you are load balancing requests across all nodes that host replicas for the collection you are querying, as Solr's CloudSolrClient will do. If not load-balancing, this feature can introduce a hotspot in the cluster since queries won't be evenly distributed across the cluster.
+
+Examples:
+
+ * Prefer PULL replicas:
+   `shards.preference=replica.type:PULL`
+
+ * Prefer PULL replicas, or TLOG replicas if PULL replicas not available:
+   `shards.preference=replica.type:PULL,replica.type:TLOG`   
+
+ * Prefer any local replicas:
+   `shards.preference=replica.location:local`
+
+ * Prefer any replicas on a host called "server1" with "server2" as the secondary option:
+   `shards.preference=replica.location:http://server1,replica.location:http://server2`
+
+ * Prefer PULL replicas if available, otherwise TLOG replicas, and local ones among those:
+   `shards.preference=replica.type:PULL,replica.type:TLOG,replica.location:local`
+
+ * Prefer local replicas, and among them PULL replicas when available TLOG otherwise:
+   `shards.preference=replica.location:local,replica.type:PULL,replica.type:TLOG`
+
+Note that if you provide the settings in a query string, they need to be properly URL-encoded.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc b/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc
index 81c6f86..b899c5f 100644
--- a/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc
+++ b/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc
@@ -86,6 +86,10 @@ If the PULL replica cannot connect to ZooKeeper, it would be removed from the cl
 
 If the PULL replica dies or is unreachable for any other reason, it won't be query-able. When it rejoins the cluster, it would replicate from the leader and when that is complete, it would be ready to serve queries again.
 
+=== Queries with Preferred Replica Types
+
+By default all replicas serve queries. See the section <<distributed-requests.adoc#shards-preference-parameter,shards.preference Parameter>> for details on how to indicate preferred replica types for queries.
+
 == Document Routing
 
 Solr offers the ability to specify the router implementation used by a collection by specifying the `router.name` parameter when <<collections-api.adoc#create,creating your collection>>.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
index 1645ff2..a67f433 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
@@ -269,6 +269,7 @@ public interface CommonParams {
 
   /**
    * When querying a node, prefer local node's cores for distributed queries.
+   * @deprecated Use {@code ShardParams.SHARDS_PREFERENCE}
    */
   String PREFER_LOCAL_SHARDS = "preferLocalShards";
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/solrj/src/java/org/apache/solr/common/params/ShardParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/ShardParams.java b/solr/solrj/src/java/org/apache/solr/common/params/ShardParams.java
index cbc33f4..567e963 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/ShardParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/ShardParams.java
@@ -52,6 +52,18 @@ public interface ShardParams {
   /** query purpose for shard requests */
   String SHARDS_PURPOSE = "shards.purpose";
 
+  /** Shards sorting rules */
+  String SHARDS_PREFERENCE = "shards.preference";
+
+  /** Replica type sort rule */
+  String SHARDS_PREFERENCE_REPLICA_TYPE = "replica.type";
+
+  /** Replica location sort rule */
+  String SHARDS_PREFERENCE_REPLICA_LOCATION = "replica.location";
+
+  /** Value denoting local replicas */
+  String REPLICA_LOCAL = "local";
+
   String _ROUTE_ = "_route_";
 
   /** Force a single-pass distributed query? (true/false) */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8927d469/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java
index e54f9ad..bc4bd8c 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java
@@ -416,18 +416,24 @@ public class CloudSolrClientTest extends SolrCloudTestCase {
         .commit(getRandomClient(), collectionName);
 
     // Run the actual test for 'preferLocalShards'
-    queryWithPreferLocalShards(getRandomClient(), true, collectionName);
+    queryWithShardsPreferenceRules(getRandomClient(), false, collectionName);
+    queryWithShardsPreferenceRules(getRandomClient(), true, collectionName);
   }
 
-  private void queryWithPreferLocalShards(CloudSolrClient cloudClient,
-                                          boolean preferLocalShards,
+  @SuppressWarnings("deprecation")
+  private void queryWithShardsPreferenceRules(CloudSolrClient cloudClient,
+                                          boolean useShardsPreference,
                                           String collectionName)
       throws Exception
   {
     SolrQuery qRequest = new SolrQuery("*:*");
 
     ModifiableSolrParams qParams = new ModifiableSolrParams();
-    qParams.add(CommonParams.PREFER_LOCAL_SHARDS, Boolean.toString(preferLocalShards));
+    if (useShardsPreference) {
+      qParams.add(ShardParams.SHARDS_PREFERENCE, ShardParams.SHARDS_PREFERENCE_REPLICA_LOCATION + ":" + ShardParams.REPLICA_LOCAL);
+    } else {
+      qParams.add(CommonParams.PREFER_LOCAL_SHARDS, "true");
+    }
     qParams.add(ShardParams.SHARDS_INFO, "true");
     qRequest.add(qParams);
 
@@ -454,17 +460,15 @@ public class CloudSolrClientTest extends SolrCloudTestCase {
     log.info("Shards giving the response: " + Arrays.toString(shardAddresses.toArray()));
 
     // Make sure the distributed queries were directed to a single node only
-    if (preferLocalShards) {
-      Set<Integer> ports = new HashSet<Integer>();
-      for (String shardAddr: shardAddresses) {
-        URL url = new URL (shardAddr);
-        ports.add(url.getPort());
-      }
-
-      // This assertion would hold true as long as every shard has a core on each node
-      assertTrue ("Response was not received from shards on a single node",
-          shardAddresses.size() > 1 && ports.size()==1);
+    Set<Integer> ports = new HashSet<Integer>();
+    for (String shardAddr: shardAddresses) {
+      URL url = new URL (shardAddr);
+      ports.add(url.getPort());
     }
+
+    // This assertion would hold true as long as every shard has a core on each node
+    assertTrue ("Response was not received from shards on a single node",
+        shardAddresses.size() > 1 && ports.size()==1);
   }
 
   private Long getNumRequests(String baseUrl, String collectionName) throws
@@ -844,4 +848,115 @@ public class CloudSolrClientTest extends SolrCloudTestCase {
     }
   }
 
+  /**
+   * Tests if the specification of 'preferReplicaTypes' in the query-params
+   * limits the distributed query to locally hosted shards only
+   */
+  @Test
+  public void preferReplicaTypesTest() throws Exception {
+
+    String collectionName = "replicaTypesTestColl";
+
+    int liveNodes = cluster.getJettySolrRunners().size();
+
+    // For these tests we need to have multiple replica types.
+    // Hence the below configuration for our collection
+    CollectionAdminRequest.createCollection(collectionName, "conf", liveNodes, 1, 1, Math.max(1, liveNodes - 2))
+        .setMaxShardsPerNode(liveNodes)
+        .processAndWait(cluster.getSolrClient(), TIMEOUT);
+    AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
+
+    // Add some new documents
+    new UpdateRequest()
+        .add(id, "0", "a_t", "hello1")
+        .add(id, "2", "a_t", "hello2")
+        .add(id, "3", "a_t", "hello2")
+        .commit(getRandomClient(), collectionName);
+
+    // Run the actual tests for 'shards.preference=replica.type:*'
+    queryWithPreferReplicaTypes(getRandomClient(), "PULL", false, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "PULL|TLOG", false, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "TLOG", false, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "TLOG|PULL", false, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "NRT", false, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "NRT|PULL", false, collectionName);
+    // Test to verify that preferLocalShards=true doesn't break this
+    queryWithPreferReplicaTypes(getRandomClient(), "PULL", true, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "PULL|TLOG", true, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "TLOG", true, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "TLOG|PULL", true, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "NRT", false, collectionName);
+    queryWithPreferReplicaTypes(getRandomClient(), "NRT|PULL", true, collectionName);
+  }
+
+  private void queryWithPreferReplicaTypes(CloudSolrClient cloudClient,
+                                           String preferReplicaTypes,
+                                           boolean preferLocalShards,
+                                           String collectionName)
+      throws Exception
+  {
+    SolrQuery qRequest = new SolrQuery("*:*");
+    ModifiableSolrParams qParams = new ModifiableSolrParams();
+
+    final List<String> preferredTypes = Arrays.asList(preferReplicaTypes.split("\\|"));
+    StringBuilder rule = new StringBuilder();
+    preferredTypes.forEach(type -> {
+      if (rule.length() != 0) {
+        rule.append(',');
+      }
+      rule.append(ShardParams.SHARDS_PREFERENCE_REPLICA_TYPE);
+      rule.append(':');
+      rule.append(type);
+    });
+    if (preferLocalShards) {
+      if (rule.length() != 0) {
+        rule.append(',');
+      }
+      rule.append(ShardParams.SHARDS_PREFERENCE_REPLICA_LOCATION);
+      rule.append(":local");
+    }
+    qParams.add(ShardParams.SHARDS_PREFERENCE, rule.toString());  
+    qParams.add(ShardParams.SHARDS_INFO, "true");
+    qRequest.add(qParams);
+
+    // CloudSolrClient sends the request to some node.
+    // And since all the nodes are hosting cores from all shards, the
+    // distributed query formed by this node will select cores from the
+    // local shards only
+    QueryResponse qResponse = cloudClient.query(collectionName, qRequest);
+
+    Object shardsInfo = qResponse.getResponse().get(ShardParams.SHARDS_INFO);
+    assertNotNull("Unable to obtain "+ShardParams.SHARDS_INFO, shardsInfo);
+
+    Map<String, String> replicaTypeMap = new HashMap<String, String>();
+    DocCollection collection = getCollectionState(collectionName);
+    for (Slice slice : collection.getSlices()) {
+      for (Replica replica : slice.getReplicas()) {
+        String coreUrl = replica.getCoreUrl();
+        // It seems replica reports its core URL with a trailing slash while shard
+        // info returned from the query doesn't. Oh well.
+        if (coreUrl.endsWith("/")) {
+          coreUrl = coreUrl.substring(0, coreUrl.length() - 1);
+        }
+        replicaTypeMap.put(coreUrl, replica.getType().toString());
+      }
+    }
+
+    // Iterate over shards-info and check that replicas of correct type responded
+    SimpleOrderedMap<?> shardsInfoMap = (SimpleOrderedMap<?>)shardsInfo;
+    Iterator<Map.Entry<String, ?>> itr = shardsInfoMap.asMap(100).entrySet().iterator();
+    List<String> shardAddresses = new ArrayList<String>();
+    while (itr.hasNext()) {
+      Map.Entry<String, ?> e = itr.next();
+      assertTrue("Did not find map-type value in "+ShardParams.SHARDS_INFO, e.getValue() instanceof Map);
+      String shardAddress = (String)((Map)e.getValue()).get("shardAddress");
+      assertNotNull(ShardParams.SHARDS_INFO+" did not return 'shardAddress' parameter", shardAddress);
+      assertTrue(replicaTypeMap.containsKey(shardAddress));
+      assertTrue(preferredTypes.indexOf(replicaTypeMap.get(shardAddress)) == 0);
+      shardAddresses.add(shardAddress);
+    }
+    assertTrue("No responses", shardAddresses.size() > 0);
+    log.info("Shards giving the response: " + Arrays.toString(shardAddresses.toArray()));
+  }
+
 }

[20/46] lucene-solr:jira/solr-11833: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

Posted by ab...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/71936db9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/71936db9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/71936db9

Branch: refs/heads/jira/solr-11833
Commit: 71936db9f073f02264398b97b1893c37e3d89f54
Parents: 1d201f3 0014f3a
Author: Karl Wright <Da...@gmail.com>
Authored: Thu Apr 12 23:32:56 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Thu Apr 12 23:32:56 2018 -0400

----------------------------------------------------------------------
 .../apache/lucene/index/TestIndexSorting.java   |  1 +
 solr/CHANGES.txt                                |  2 ++
 .../org/apache/solr/TestDistributedSearch.java  |  1 +
 .../org/apache/solr/cloud/AddReplicaTest.java   |  1 +
 .../apache/solr/cloud/AliasIntegrationTest.java |  1 +
 .../cloud/ChaosMonkeyNothingIsSafeTest.java     |  1 +
 .../solr/cloud/CreateRoutedAliasTest.java       |  4 ++++
 .../apache/solr/cloud/DeleteReplicaTest.java    |  1 +
 .../apache/solr/cloud/TestCloudRecovery.java    |  1 +
 .../TestLeaderInitiatedRecoveryThread.java      |  3 +++
 .../solr/cloud/TestStressInPlaceUpdates.java    |  1 +
 .../TestCollectionsAPIViaSolrCloudCluster.java  |  1 +
 .../autoscaling/ComputePlanActionTest.java      |  1 +
 .../cloud/autoscaling/ScheduledTriggerTest.java |  1 +
 .../autoscaling/sim/TestTriggerIntegration.java |  1 +
 .../solr/cloud/cdcr/CdcrBidirectionalTest.java  | 24 +++++++++++---------
 .../admin/AutoscalingHistoryHandlerTest.java    |  3 +--
 .../cloud/TestCollectionStateWatchers.java      |  1 +
 18 files changed, 36 insertions(+), 13 deletions(-)
----------------------------------------------------------------------

[07/46] lucene-solr:jira/solr-11833: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

Posted by ab...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/017f59ba
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/017f59ba
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/017f59ba

Branch: refs/heads/jira/solr-11833
Commit: 017f59bae5b6ee883d5284285713d26c09629de5
Parents: 0b1e8ef 11d54b0
Author: Karl Wright <Da...@gmail.com>
Authored: Thu Apr 12 06:39:56 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Thu Apr 12 06:39:56 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   7 +
 .../org/apache/solr/cloud/ElectionContext.java  |   3 +-
 .../org/apache/solr/cloud/RecoveryStrategy.java |   2 +-
 .../component/HttpShardHandlerFactory.java      | 165 +++++++++++++------
 .../solr/response/GraphMLResponseWriter.java    |  14 +-
 .../solr/cloud/LeaderVoteWaitTimeoutTest.java   |  19 ++-
 .../component/TestHttpShardHandlerFactory.java  | 119 +++++++++++++
 .../src/distributed-requests.adoc               |  40 +++++
 .../shards-and-indexing-data-in-solrcloud.adoc  |   4 +
 .../apache/solr/common/params/CommonParams.java |   1 +
 .../apache/solr/common/params/ShardParams.java  |  12 ++
 .../client/solrj/impl/CloudSolrClientTest.java  | 143 ++++++++++++++--
 12 files changed, 449 insertions(+), 80 deletions(-)
----------------------------------------------------------------------

[09/46] lucene-solr:jira/solr-11833: Fix TestLRUQueryCache#testDocValuesUpdatesDontBreakCache

Posted by ab...@apache.org.

Fix TestLRUQueryCache#testDocValuesUpdatesDontBreakCache

this test was flagged as BadApple and referred to SOLR-12028
The test stated clearly that the usage of newSearch(reader) is
dangerous since it might add concurrency to the test. This commit
respects this comment and removes all subsequent useage of
newSearcher(...)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/24f10c48
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/24f10c48
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/24f10c48

Branch: refs/heads/jira/solr-11833
Commit: 24f10c48dc56de71d72e8bbdcc6a7d9a13f5edf6
Parents: 9cb07c7
Author: Simon Willnauer <si...@apache.org>
Authored: Thu Apr 12 12:12:21 2018 +0200
Committer: Simon Willnauer <si...@apache.org>
Committed: Thu Apr 12 12:44:34 2018 +0200

----------------------------------------------------------------------
 .../test/org/apache/lucene/search/TestLRUQueryCache.java    | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24f10c48/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
index b2645ab..74066ca 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@@ -1480,16 +1480,15 @@ public class TestLRUQueryCache extends LuceneTestCase {
   }
 
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
   public void testDocValuesUpdatesDontBreakCache() throws IOException {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
-    //RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
     IndexWriter w = new IndexWriter(dir, iwc);
     w.addDocument(new Document());
     w.commit();
     DirectoryReader reader = DirectoryReader.open(w);
 
+    // IMPORTANT:
     // Don't use newSearcher(), because that will sometimes use an ExecutorService, and
     // we need to be single threaded to ensure that LRUQueryCache doesn't skip the cache
     // due to thread contention
@@ -1511,7 +1510,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
     w.addDocument(doc);
     reader.close();
     reader = DirectoryReader.open(w);
-    searcher = newSearcher(reader);
+    searcher = new AssertingIndexSearcher(random(), reader); // no newSearcher(reader) - see comment above
     searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
     searcher.setQueryCache(cache);
 
@@ -1520,7 +1519,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
 
     reader.close();
     reader = DirectoryReader.open(w);
-    searcher = newSearcher(reader);
+    searcher = new AssertingIndexSearcher(random(), reader); // no newSearcher(reader) - see comment above
     searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
     searcher.setQueryCache(cache);
 
@@ -1531,7 +1530,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
     w.updateNumericDocValue(new Term("text", "text"), "field", 2l);
     reader.close();
     reader = DirectoryReader.open(w);
-    searcher = newSearcher(reader);
+    searcher = new AssertingIndexSearcher(random(), reader); // no newSearcher(reader) - see comment above
     searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
     searcher.setQueryCache(cache);

[23/46] lucene-solr:jira/solr-11833: LUCENE-8233: Never reset initiazlized DV gen

Posted by ab...@apache.org.

LUCENE-8233: Never reset initiazlized DV gen

We drop changes after we finish a merge, this has also reset
the DV generation the PendingSoftDeletes were initialized on causing
assertions to trip if releaseing the reader was writing DV to disk.
This change removes resetting the dv generation to make assertions
hold which requried to keep the pending change count on PendingSoftDeletes.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d5b68576
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d5b68576
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d5b68576

Branch: refs/heads/jira/solr-11833
Commit: d5b68576034ee9ee936e1e712639212a3a04f73f
Parents: f41e7c4
Author: Simon Willnauer <si...@apache.org>
Authored: Fri Apr 13 09:30:32 2018 +0200
Committer: Simon Willnauer <si...@apache.org>
Committed: Fri Apr 13 09:34:33 2018 +0200

----------------------------------------------------------------------
 .../src/java/org/apache/lucene/index/PendingDeletes.java     | 4 ++--
 .../src/java/org/apache/lucene/index/PendingSoftDeletes.java | 8 ++++----
 .../src/java/org/apache/lucene/index/ReadersAndUpdates.java  | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5b68576/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
index fca42b4..52d06e8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
@@ -166,7 +166,7 @@ class PendingDeletes {
   /**
    * Resets the pending docs
    */
-  void reset() {
+  void dropChanges() {
     pendingDeleteCount = 0;
   }
 
@@ -223,7 +223,7 @@ class PendingDeletes {
     // (successfully written) del docs:
     info.advanceDelGen();
     info.setDelCount(info.getDelCount() + pendingDeleteCount);
-    reset();
+    dropChanges();
     return true;
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5b68576/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
index 0d7852b..4c3db48 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
@@ -88,10 +88,10 @@ final class PendingSoftDeletes extends PendingDeletes {
   }
 
   @Override
-  void reset() {
-    dvGeneration = -2;
-    super.reset();
-    hardDeletes.reset();
+  void dropChanges() {
+    // don't reset anything here - this is called after a merge (successful or not) to prevent
+    // rewriting the deleted docs to disk. we only pass it on and reset the number of pending deletes
+    hardDeletes.dropChanges();
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5b68576/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
index 6dc8864..b236bea 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
@@ -287,7 +287,7 @@ final class ReadersAndUpdates {
     // is running, by now we have carried forward those
     // deletes onto the newly merged segment, so we can
     // discard them on the sub-readers:
-    pendingDeletes.reset();
+    pendingDeletes.dropChanges();
     dropMergingUpdates();
   }

[17/46] lucene-solr:jira/solr-11833: SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir

Posted by ab...@apache.org.

SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2a2a0b6a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2a2a0b6a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2a2a0b6a

Branch: refs/heads/jira/solr-11833
Commit: 2a2a0b6acd527ae219b66d22b67c2c7b37273bf2
Parents: 3d5f2f2
Author: Varun Thacker <va...@apache.org>
Authored: Thu Apr 12 15:25:11 2018 -0700
Committer: Varun Thacker <va...@apache.org>
Committed: Thu Apr 12 15:35:34 2018 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 ++
 .../solr/cloud/cdcr/CdcrBidirectionalTest.java  | 24 +++++++++++---------
 2 files changed, 15 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2a2a0b6a/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c2e6da7..3194e3d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -149,6 +149,8 @@ Bug Fixes
 
 * SOLR-12065: A successful restore collection should mark the shard state as active and not buffering
   (Rohit, Varun Thacker)
+
+* SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir (Steve Rowe, Amrit Sarkar via Varun Thacker)
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2a2a0b6a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java
index 4a7fae4..a11b9ac 100644
--- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java
@@ -32,7 +32,9 @@ import org.apache.solr.cloud.MiniSolrCloudCluster;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.handler.CdcrParams;
+import org.apache.solr.util.TimeOut;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -185,19 +187,20 @@ public class CdcrBidirectionalTest extends SolrTestCaseJ4 {
       // ATOMIC UPDATES
       req = new UpdateRequest();
       doc = new SolrInputDocument();
+      String atomicFieldName = "abc";
       ImmutableMap.of("", "");
       String atomicUpdateId = "cluster2_" + random().nextInt(numDocs_c2);
       doc.addField("id", atomicUpdateId);
       doc.addField("xyz", ImmutableMap.of("delete", ""));
-      doc.addField("abc", ImmutableMap.of("set", "ABC"));
+      doc.addField(atomicFieldName, ImmutableMap.of("set", "ABC"));
       req.add(doc);
       req.process(cluster2SolrClient);
       cluster2SolrClient.commit();
 
       String atomicQuery = "id:" + atomicUpdateId;
       response = cluster2SolrClient.query(new SolrQuery(atomicQuery));
-      assertEquals("cluster 2 wrong doc", "ABC", response.getResults().get(0).get("abc"));
-      assertEquals("cluster 1 wrong doc", "ABC", getDocFieldValue(cluster1SolrClient, atomicQuery, "ABC"));
+      assertEquals("cluster 2 wrong doc", "ABC", response.getResults().get(0).get(atomicFieldName));
+      assertEquals("cluster 1 wrong doc", "ABC", getDocFieldValue(cluster1SolrClient, atomicQuery, "ABC", atomicFieldName ));
 
 
       // logging cdcr clusters queue response
@@ -218,17 +221,16 @@ public class CdcrBidirectionalTest extends SolrTestCaseJ4 {
     }
   }
 
-  private String getDocFieldValue(CloudSolrClient clusterSolrClient, String query, String match) throws Exception {
-    long start = System.nanoTime();
-    QueryResponse response = null;
-    while (System.nanoTime() - start <= TimeUnit.NANOSECONDS.convert(120, TimeUnit.SECONDS)) {
+  private String getDocFieldValue(CloudSolrClient clusterSolrClient, String query, String match, String field) throws Exception {
+    TimeOut waitTimeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
+    while (!waitTimeOut.hasTimedOut()) {
       clusterSolrClient.commit();
-      response = clusterSolrClient.query(new SolrQuery(query));
-      if (match.equals(response.getResults().get(0).get("abc"))) {
-        break;
+      QueryResponse response = clusterSolrClient.query(new SolrQuery(query));
+      if (response.getResults().size() > 0 && match.equals(response.getResults().get(0).get(field))) {
+        return (String) response.getResults().get(0).get(field);
       }
       Thread.sleep(1000);
     }
-    return response != null ? (String) response.getResults().get(0).get("abc") : "";
+    return null;
   }
 }

[46/46] lucene-solr:jira/solr-11833: SOLR-11833: Add support for configurable actions and metrics, and improve cold ops calculation.

Posted by ab...@apache.org.

SOLR-11833: Add support for configurable actions and metrics, and improve cold ops calculation.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0546c5fc
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0546c5fc
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0546c5fc

Branch: refs/heads/jira/solr-11833
Commit: 0546c5fcee208f9c59503e71beb196ddf5a23da8
Parents: 5bbe689
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Mon Apr 16 19:18:43 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Mon Apr 16 19:18:43 2018 +0200

----------------------------------------------------------------------
 .../cloud/autoscaling/SearchRateTrigger.java    | 140 +++++++---
 .../SearchRateTriggerIntegrationTest.java       | 259 ++++++++++++++++++-
 .../autoscaling/DeleteReplicaSuggester.java     |   4 +-
 .../client/solrj/cloud/autoscaling/Policy.java  |   1 +
 .../solrj/cloud/autoscaling/ReplicaInfo.java    |  18 ++
 5 files changed, 374 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0546c5fc/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java
index ecbee25..a653a14 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.cloud.autoscaling;
 
+import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -34,7 +35,9 @@ import org.apache.solr.client.solrj.cloud.SolrCloudManager;
 import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.AutoScalingParams;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.util.Pair;
@@ -50,6 +53,9 @@ import org.slf4j.LoggerFactory;
 public class SearchRateTrigger extends TriggerBase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
+  public static final String METRIC_PROP = "metric";
+  public static final String MAX_OPS_PROP = "maxOps";
+  public static final String MIN_REPLICAS_PROP = "minReplicas";
   public static final String ABOVE_RATE_PROP = "aboveRate";
   public static final String BELOW_RATE_PROP = "belowRate";
   public static final String ABOVE_OP_PROP = "aboveOp";
@@ -66,7 +72,12 @@ public class SearchRateTrigger extends TriggerBase {
   public static final String COLD_SHARDS = "coldShards";
   public static final String COLD_REPLICAS = "coldReplicas";
 
-  private String handler;
+  public static final int DEFAULT_MAX_OPS = 3;
+  public static final String DEFAULT_METRIC = "QUERY./select.requestTimes:1minRate";
+
+  private String metric;
+  private int maxOps;
+  private Integer minReplicas = null;
   private String collection;
   private String shard;
   private String node;
@@ -85,10 +96,17 @@ public class SearchRateTrigger extends TriggerBase {
     this.state.put("lastNodeEvent", lastNodeEvent);
     this.state.put("lastShardEvent", lastShardEvent);
     this.state.put("lastReplicaEvent", lastReplicaEvent);
-    TriggerUtils.requiredProperties(requiredProperties, validProperties);
     TriggerUtils.validProperties(validProperties,
         AutoScalingParams.COLLECTION, AutoScalingParams.SHARD, AutoScalingParams.NODE,
-        AutoScalingParams.HANDLER, ABOVE_RATE_PROP, BELOW_RATE_PROP);
+        METRIC_PROP,
+        MAX_OPS_PROP,
+        MIN_REPLICAS_PROP,
+        ABOVE_OP_PROP,
+        BELOW_OP_PROP,
+        ABOVE_NODE_OP_PROP,
+        BELOW_NODE_OP_PROP,
+        ABOVE_RATE_PROP,
+        BELOW_RATE_PROP);
   }
 
   @Override
@@ -101,7 +119,23 @@ public class SearchRateTrigger extends TriggerBase {
       throw new TriggerValidationException(name, AutoScalingParams.SHARD, "When 'shard' is other than #ANY then collection name must be also other than #ANY");
     }
     node = (String)properties.getOrDefault(AutoScalingParams.NODE, Policy.ANY);
-    handler = (String)properties.getOrDefault(AutoScalingParams.HANDLER, "/select");
+    metric = (String)properties.getOrDefault(METRIC_PROP, DEFAULT_METRIC);
+
+    String maxOpsStr = String.valueOf(properties.getOrDefault(MAX_OPS_PROP, DEFAULT_MAX_OPS));
+    try {
+      maxOps = Integer.parseInt(maxOpsStr);
+    } catch (Exception e) {
+      throw new TriggerValidationException(name, MAX_OPS_PROP, "invalid value '" + maxOpsStr + "': " + e.toString());
+    }
+
+    Object o = properties.get(MIN_REPLICAS_PROP);
+    if (o != null) {
+      try {
+        minReplicas = Integer.parseInt(o.toString());
+      } catch (Exception e) {
+        throw new TriggerValidationException(name, MIN_REPLICAS_PROP, "invalid value '" + o + "': " + e.toString());
+      }
+    }
 
     Object above = properties.get(ABOVE_RATE_PROP);
     Object below = properties.get(BELOW_RATE_PROP);
@@ -138,15 +172,21 @@ public class SearchRateTrigger extends TriggerBase {
     if (belowOp == null) {
       throw new TriggerValidationException(getName(), BELOW_OP_PROP, "unrecognized value: '" + belowOpStr + "'");
     }
-    aboveOpStr = String.valueOf(properties.getOrDefault(ABOVE_NODE_OP_PROP, CollectionParams.CollectionAction.MOVEREPLICA.toLower()));
-    belowOpStr = String.valueOf(properties.getOrDefault(BELOW_NODE_OP_PROP, CollectionParams.CollectionAction.DELETENODE.toLower()));
-    aboveNodeOp = CollectionParams.CollectionAction.get(aboveOpStr);
-    if (aboveNodeOp == null) {
-      throw new TriggerValidationException(getName(), ABOVE_NODE_OP_PROP, "unrecognized value: '" + aboveOpStr + "'");
+    Object aboveNodeObj = properties.get(ABOVE_NODE_OP_PROP);
+    Object belowNodeObj = properties.get(BELOW_NODE_OP_PROP);
+    if (aboveNodeObj != null) {
+      try {
+        aboveNodeOp = CollectionParams.CollectionAction.get(String.valueOf(aboveNodeObj));
+      } catch (Exception e) {
+        throw new TriggerValidationException(getName(), ABOVE_NODE_OP_PROP, "unrecognized value: '" + aboveNodeObj + "'");
+      }
     }
-    belowNodeOp = CollectionParams.CollectionAction.get(belowOpStr);
-    if (belowNodeOp == null) {
-      throw new TriggerValidationException(getName(), BELOW_NODE_OP_PROP, "unrecognized value: '" + belowOpStr + "'");
+    if (belowNodeObj != null) {
+      try {
+        belowNodeOp = CollectionParams.CollectionAction.get(String.valueOf(belowNodeObj));
+      } catch (Exception e) {
+        throw new TriggerValidationException(getName(), BELOW_NODE_OP_PROP, "unrecognized value: '" + belowNodeObj + "'");
+      }
     }
   }
 
@@ -215,6 +255,13 @@ public class SearchRateTrigger extends TriggerBase {
     // collection, shard, RF
     Map<String, Map<String, AtomicInteger>> searchableReplicationFactors = new HashMap<>();
 
+    ClusterState clusterState = null;
+    try {
+      clusterState = cloudManager.getClusterStateProvider().getClusterState();
+    } catch (IOException e) {
+      log.warn("Error getting ClusterState", e);
+      return;
+    }
     for (String node : cloudManager.getClusterStateProvider().getLiveNodes()) {
       Map<String, ReplicaInfo> metricTags = new HashMap<>();
       // coll, shard, replica
@@ -238,8 +285,7 @@ public class SearchRateTrigger extends TriggerBase {
               replicaName = replica.getName(); // which is actually coreNode name...
             }
             String registry = SolrCoreMetricManager.createRegistryName(true, coll, sh, replicaName, null);
-            String tag = "metrics:" + registry
-                + ":QUERY." + handler + ".requestTimes:1minRate";
+            String tag = "metrics:" + registry + ":" + metric;
             metricTags.put(tag, replica);
           });
         });
@@ -396,7 +442,7 @@ public class SearchRateTrigger extends TriggerBase {
     final List<TriggerEvent.Op> ops = new ArrayList<>();
 
     calculateHotOps(ops, searchableReplicationFactors, hotNodes, hotCollections, hotShards, hotReplicas);
-    calculateColdOps(ops, searchableReplicationFactors, coldNodes, coldCollections, coldShards, coldReplicas);
+    calculateColdOps(ops, clusterState, searchableReplicationFactors, coldNodes, coldCollections, coldShards, coldReplicas);
 
     if (ops.isEmpty()) {
       return;
@@ -432,9 +478,11 @@ public class SearchRateTrigger extends TriggerBase {
     // TODO: eventually we may want to commission a new node
     if (!hotNodes.isEmpty() && hotShards.isEmpty() && hotCollections.isEmpty() && hotReplicas.isEmpty()) {
       // move replicas around
-      hotNodes.forEach((n, r) -> {
-        ops.add(new TriggerEvent.Op(aboveNodeOp, Suggester.Hint.SRC_NODE, n));
-      });
+      if (aboveNodeOp != null) {
+        hotNodes.forEach((n, r) -> {
+          ops.add(new TriggerEvent.Op(aboveNodeOp, Suggester.Hint.SRC_NODE, n));
+        });
+      }
     } else {
       // add replicas
       Map<String, Map<String, List<Pair<String, String>>>> hints = new HashMap<>();
@@ -472,9 +520,9 @@ public class SearchRateTrigger extends TriggerBase {
     if (numReplicas < 1) {
       numReplicas = 1;
     }
-    // ... and at most 3 replicas
-    if (numReplicas > 3) {
-      numReplicas = 3;
+    // ... and at most maxOps replicas
+    if (numReplicas > maxOps) {
+      numReplicas = maxOps;
     }
     for (int i = 0; i < numReplicas; i++) {
       hints.add(new Pair(collection, shard));
@@ -482,22 +530,27 @@ public class SearchRateTrigger extends TriggerBase {
   }
 
   private void calculateColdOps(List<TriggerEvent.Op> ops,
+                                ClusterState clusterState,
                                 Map<String, Map<String, AtomicInteger>> searchableReplicationFactors,
                                 Map<String, Double> coldNodes,
                                 Map<String, Double> coldCollections,
                                 Map<String, Map<String, Double>> coldShards,
                                 List<ReplicaInfo> coldReplicas) {
     // COLD NODES:
-    // Unlike in case of hot nodes, if a node is cold then any monitored
+    // Unlike the case of hot nodes, if a node is cold then any monitored
     // collections / shards / replicas located on that node are cold, too.
     // HOWEVER, we check only non-pull replicas and only from selected collections / shards,
     // so deleting a cold node is dangerous because it may interfere with these
-    // non-monitored resources
-    /*
-    coldNodes.forEach((node, rate) -> {
-      ops.add(new TriggerEvent.Op(belowNodeOp, Suggester.Hint.SRC_NODE, node));
-    });
-    */
+    // non-monitored resources - this is the reason the default belowNodeOp is null / ignored.
+    //
+    // Also, note that due to the way activity is measured only nodes that contain any
+    // monitored resources are considered - there may be cold nodes in the cluster that don't
+    // belong to the monitored collections and they will be ignored.
+    if (belowNodeOp != null) {
+      coldNodes.forEach((node, rate) -> {
+        ops.add(new TriggerEvent.Op(belowNodeOp, Suggester.Hint.SRC_NODE, node));
+      });
+    }
 
     // COLD COLLECTIONS
     // Probably can't do anything reasonable about whole cold collections
@@ -509,8 +562,8 @@ public class SearchRateTrigger extends TriggerBase {
     // address this by deleting cold replicas
 
     // COLD REPLICAS:
-    // Remove cold replicas but only when there's at least one more searchable replica
-    // still available (additional non-searchable replicas may exist, too)
+    // Remove cold replicas but only when there's at least a minimum number of searchable
+    // replicas still available (additional non-searchable replicas may exist, too)
     Map<String, Map<String, List<ReplicaInfo>>> byCollectionByShard = new HashMap<>();
     coldReplicas.forEach(ri -> {
       byCollectionByShard.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
@@ -519,16 +572,33 @@ public class SearchRateTrigger extends TriggerBase {
     });
     byCollectionByShard.forEach((coll, shards) -> {
       shards.forEach((shard, replicas) -> {
-        // only delete if there's at least one searchable replica left
-        // again, use a simple proportional controller with a limiter
+        // only delete if there's at least minRF searchable replicas left
         int rf = searchableReplicationFactors.get(coll).get(shard).get();
-        if (rf > replicas.size()) {
-          // delete at most 3 replicas at a time
-          AtomicInteger limit = new AtomicInteger(3);
+        // we only really need a leader and we may be allowed to remove other replicas
+        int minRF = 1;
+        // but check the official RF and don't go below that
+        Integer RF = clusterState.getCollection(coll).getReplicationFactor();
+        if (RF != null) {
+          minRF = RF;
+        }
+        // unless minReplicas is set explicitly
+        if (minReplicas != null) {
+          minRF = minReplicas;
+        }
+        if (minRF < 1) {
+          minRF = 1;
+        }
+        if (rf > minRF) {
+          // delete at most maxOps replicas at a time
+          AtomicInteger limit = new AtomicInteger(Math.min(maxOps, rf - minRF));
           replicas.forEach(ri -> {
             if (limit.get() == 0) {
               return;
             }
+            // don't delete a leader
+            if (ri.getBool(ZkStateReader.LEADER_PROP, false)) {
+              return;
+            }
             TriggerEvent.Op op = new TriggerEvent.Op(belowOp,
                 Suggester.Hint.COLL_SHARD, new Pair<>(ri.getCollection(), ri.getShard()));
             op.addHint(Suggester.Hint.REPLICA, ri.getName());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0546c5fc/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
index c1412ab..370b23a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/SearchRateTriggerIntegrationTest.java
@@ -22,10 +22,9 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import com.google.common.util.concurrent.AtomicDouble;
 import org.apache.lucene.util.LuceneTestCase;
@@ -37,12 +36,16 @@ import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.util.LogLevel;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -56,16 +59,16 @@ import static org.apache.solr.cloud.autoscaling.TriggerIntegrationTest.timeSourc
  * Integration test for {@link SearchRateTrigger}
  */
 @LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.client.solrj.cloud.autoscaling=DEBUG")
-@LuceneTestCase.BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
+@LuceneTestCase.Slow
 public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  private static CountDownLatch triggerFiredLatch = new CountDownLatch(1);
   private static CountDownLatch listenerCreated = new CountDownLatch(1);
-  private static int waitForSeconds = 1;
-  private static Set<TriggerEvent> events = ConcurrentHashMap.newKeySet();
   private static Map<String, List<CapturedEvent>> listenerEvents = new HashMap<>();
-  static CountDownLatch finished = new CountDownLatch(1);
+  private static CountDownLatch finished = new CountDownLatch(1);
+  private static SolrCloudManager cloudManager;
+
+  private int waitForSeconds;
 
   @BeforeClass
   public static void setupCluster() throws Exception {
@@ -80,21 +83,36 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     SolrClient solrClient = cluster.getSolrClient();
     NamedList<Object> response = solrClient.request(req);
     assertEquals(response.get("result").toString(), "success");
+    cloudManager = cluster.getJettySolrRunner(0).getCoreContainer().getZkController().getSolrCloudManager();
+  }
+
+  @Before
+  public void beforeTest() throws Exception {
+    cluster.deleteAllCollections();
+    finished = new CountDownLatch(1);
+    listenerEvents.clear();
+    waitForSeconds = 3 + random().nextInt(5);
   }
 
   @Test
   public void testAboveSearchRate() throws Exception {
     CloudSolrClient solrClient = cluster.getSolrClient();
-    String COLL1 = "collection1";
+    String COLL1 = "aboveRate_collection";
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1,
         "conf", 1, 2);
     create.process(solrClient);
+
+    CloudTestUtils.waitForState(cloudManager, COLL1, 20, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(1, 2));
+
+    // the trigger is initially disabled so that we have the time to set up listeners
+    // and generate the traffic
     String setTriggerCommand = "{" +
         "'set-trigger' : {" +
         "'name' : 'search_rate_trigger'," +
         "'event' : 'searchRate'," +
         "'waitFor' : '" + waitForSeconds + "s'," +
-        "'enabled' : true," +
+        "'enabled' : false," +
         "'collection' : '" + COLL1 + "'," +
         "'aboveRate' : 1.0," +
         "'belowRate' : 0.1," +
@@ -134,13 +152,23 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
     response = solrClient.request(req);
     assertEquals(response.get("result").toString(), "success");
 
-    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
-
     SolrParams query = params(CommonParams.Q, "*:*");
     for (int i = 0; i < 500; i++) {
       solrClient.query(COLL1, query);
     }
 
+    // enable the trigger
+    String resumeTriggerCommand = "{" +
+        "'resume-trigger' : {" +
+        "'name' : 'search_rate_trigger'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, resumeTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
     boolean await = finished.await(20, TimeUnit.SECONDS);
     assertTrue("The trigger did not fire at all", await);
 
@@ -201,7 +229,216 @@ public class SearchRateTriggerIntegrationTest extends SolrCloudTestCase {
 
   @Test
   public void testBelowSearchRate() throws Exception {
+    CloudSolrClient solrClient = cluster.getSolrClient();
+    String COLL1 = "belowRate_collection";
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1,
+        "conf", 1, 2);
+    create.process(solrClient);
+    // add a couple of spare replicas above RF. Use different types to verify that only
+    // searchable replicas are considered
+    // these additional replicas will be placed on other nodes in the cluster
+    solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.NRT));
+    solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.TLOG));
+    solrClient.request(CollectionAdminRequest.addReplicaToShard(COLL1, "shard1", Replica.Type.PULL));
+
+    CloudTestUtils.waitForState(cloudManager, COLL1, 20, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(1, 5));
+
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'search_rate_trigger'," +
+        "'event' : 'searchRate'," +
+        "'waitFor' : '" + waitForSeconds + "s'," +
+        "'enabled' : false," +
+        "'collection' : '" + COLL1 + "'," +
+        "'aboveRate' : 1.0," +
+        "'belowRate' : 0.1," +
+        "'belowNodeOp' : 'none'," +
+        "'actions' : [" +
+        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
+        "]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    String setListenerCommand = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'srt'," +
+        "'trigger' : 'search_rate_trigger'," +
+        "'stage' : ['FAILED','SUCCEEDED']," +
+        "'afterAction': ['compute', 'execute']," +
+        "'class' : '" + CapturingTriggerListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    setListenerCommand = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'finished'," +
+        "'trigger' : 'search_rate_trigger'," +
+        "'stage' : ['SUCCEEDED']," +
+        "'class' : '" + FinishedProcessingListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
+    // enable the trigger
+    String resumeTriggerCommand = "{" +
+        "'resume-trigger' : {" +
+        "'name' : 'search_rate_trigger'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, resumeTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
+    boolean await = finished.await(20, TimeUnit.SECONDS);
+    assertTrue("The trigger did not fire at all", await);
+
+    // suspend the trigger
+    // enable the trigger
+    String suspendTriggerCommand = "{" +
+        "'suspend-trigger' : {" +
+        "'name' : 'search_rate_trigger'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    timeSource.sleep(5000);
+
+    List<CapturedEvent> events = listenerEvents.get("srt");
+    assertEquals(events.toString(), 3, events.size());
+    CapturedEvent ev = events.get(0);
+    assertEquals(ev.toString(), "compute", ev.actionName);
+    List<TriggerEvent.Op> ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
+    assertNotNull("there should be some requestedOps: " + ev.toString(), ops);
+    // 3 cold nodes, 2 cold replicas
+    assertEquals(ops.toString(), 5, ops.size());
+    AtomicInteger coldNodes = new AtomicInteger();
+    AtomicInteger coldReplicas = new AtomicInteger();
+    ops.forEach(op -> {
+      if (op.getAction().equals(CollectionParams.CollectionAction.NONE)) {
+        coldNodes.incrementAndGet();
+      } else if (op.getAction().equals(CollectionParams.CollectionAction.DELETEREPLICA)) {
+        coldReplicas.incrementAndGet();
+      } else {
+        fail("unexpected op: " + op);
+      }
+    });
+    assertEquals("cold nodes", 3, coldNodes.get());
+    assertEquals("cold replicas", 2, coldReplicas.get());
+
+    // now the collection should be back to RF = 2, with one additional PULL replica
+    CloudTestUtils.waitForState(cloudManager, COLL1, 20, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(1, 3));
+
+    listenerEvents.clear();
+    finished = new CountDownLatch(1);
+
+    // resume trigger
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, resumeTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    // there should be only coldNode ops now, and no coldReplica ops since searchable RF == collection RF
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
+    await = finished.await(20, TimeUnit.SECONDS);
+    assertTrue("The trigger did not fire at all", await);
+
+    // suspend trigger
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    timeSource.sleep(5000);
+
+    events = listenerEvents.get("srt");
+    assertEquals(events.toString(), 3, events.size());
+
+    ev = events.get(0);
+    assertEquals(ev.toString(), "compute", ev.actionName);
+    ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
+    assertNotNull("there should be some requestedOps: " + ev.toString(), ops);
+    assertEquals(ops.toString(), 1, ops.size());
+    assertEquals(ops.toString(), CollectionParams.CollectionAction.NONE, ops.get(0).getAction());
+
+    listenerEvents.clear();
+    finished = new CountDownLatch(1);
+
+    // now allow single replicas
+    setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'search_rate_trigger'," +
+        "'event' : 'searchRate'," +
+        "'waitFor' : '" + waitForSeconds + "s'," +
+        "'enabled' : true," +
+        "'collection' : '" + COLL1 + "'," +
+        "'aboveRate' : 1.0," +
+        "'belowRate' : 0.1," +
+        "'minReplicas' : 1," +
+        "'belowNodeOp' : 'none'," +
+        "'actions' : [" +
+        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}" +
+        "]" +
+        "}}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    timeSource.sleep(TimeUnit.MILLISECONDS.convert(waitForSeconds + 1, TimeUnit.SECONDS));
+
+    await = finished.await(20, TimeUnit.SECONDS);
+    assertTrue("The trigger did not fire at all", await);
+
+    // suspend trigger
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, suspendTriggerCommand);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    timeSource.sleep(5000);
+
+    events = listenerEvents.get("srt");
+    assertEquals(events.toString(), 3, events.size());
+
+    ev = events.get(0);
+    assertEquals(ev.toString(), "compute", ev.actionName);
+    ops = (List<TriggerEvent.Op>)ev.event.getProperty(TriggerEvent.REQUESTED_OPS);
+    assertNotNull("there should be some requestedOps: " + ev.toString(), ops);
+    assertEquals(ops.toString(), 2, ops.size());
+    AtomicInteger coldNodes2 = new AtomicInteger();
+    AtomicInteger coldReplicas2 = new AtomicInteger();
+    ops.forEach(op -> {
+      if (op.getAction().equals(CollectionParams.CollectionAction.NONE)) {
+        coldNodes2.incrementAndGet();
+      } else if (op.getAction().equals(CollectionParams.CollectionAction.DELETEREPLICA)) {
+        coldReplicas2.incrementAndGet();
+      } else {
+        fail("unexpected op: " + op);
+      }
+    });
+
+    assertEquals("coldNodes", 1, coldNodes2.get());
+    assertEquals("colReplicas", 1, coldReplicas2.get());
 
+    // now the collection should be at RF == 1, with one additional PULL replica
+    CloudTestUtils.waitForState(cloudManager, COLL1, 20, TimeUnit.SECONDS,
+        CloudTestUtils.clusterShape(1, 2));
   }
 
   public static class CapturingTriggerListener extends TriggerListenerBase {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0546c5fc/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/DeleteReplicaSuggester.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/DeleteReplicaSuggester.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/DeleteReplicaSuggester.java
index a7d5d70..9a942ad 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/DeleteReplicaSuggester.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/DeleteReplicaSuggester.java
@@ -25,8 +25,8 @@ import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.util.Pair;
 
 /**
- * This suggester produces a DELETEREPLICA request using provided {@link Hint#COLL_SHARD} and
- * {@link Hint#NUMBER} hints to specify the collection, shard and number of replicas to delete.
+ * This suggester produces a DELETEREPLICA request using provided {@link org.apache.solr.client.solrj.cloud.autoscaling.Suggester.Hint#COLL_SHARD} and
+ * {@link org.apache.solr.client.solrj.cloud.autoscaling.Suggester.Hint#NUMBER} hints to specify the collection, shard and number of replicas to delete.
  */
 class DeleteReplicaSuggester extends Suggester {
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0546c5fc/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
index 74a4d1f..2f729d9 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java
@@ -470,6 +470,7 @@ public class Policy implements MapWriter {
     ops.put(CollectionAction.MOVEREPLICA, () -> new MoveReplicaSuggester());
     ops.put(CollectionAction.SPLITSHARD, () -> new SplitShardSuggester());
     ops.put(CollectionAction.MERGESHARDS, () -> new UnsupportedSuggester(CollectionAction.MERGESHARDS));
+    ops.put(CollectionAction.NONE, () -> new UnsupportedSuggester(CollectionAction.NONE));
   }
 
   public Map<String, List<Clause>> getPolicies() {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0546c5fc/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
index e1d8281..50e77f8 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/ReplicaInfo.java
@@ -138,6 +138,24 @@ public class ReplicaInfo implements MapWriter {
     return variables.get(name);
   }
 
+  public Object getVariable(String name, Object defValue) {
+    Object o = variables.get(name);
+    if (o != null) {
+      return o;
+    } else {
+      return defValue;
+    }
+  }
+
+  public boolean getBool(String name, boolean defValue) {
+    Object o = getVariable(name, defValue);
+    if (o instanceof Boolean) {
+      return (Boolean)o;
+    } else {
+      return Boolean.parseBoolean(String.valueOf(o));
+    }
+  }
+
   @Override
   public String toString() {
     return Utils.toJSONString(this);

[27/46] lucene-solr:jira/solr-11833: LUCENE-8231: Add a new analysis module (nori) similar to Kuromoji but to handle Korean

Posted by ab...@apache.org.

LUCENE-8231: Add a new analysis module (nori) similar to Kuromoji but to handle Korean

This change adds a korean analyzer in a new analysis module named nori. It is similar
to Kuromoji but uses the mecab-ko-dic dictionary to perform morphological analysis of Korean
text.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e851b89c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e851b89c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e851b89c

Branch: refs/heads/jira/solr-11833
Commit: e851b89cbeb1f55edc0f2c1276e2ae812eca2643
Parents: d5b6857
Author: Jim Ferenczi <ji...@apache.org>
Authored: Fri Apr 13 11:26:42 2018 +0200
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Fri Apr 13 11:26:42 2018 +0200

----------------------------------------------------------------------
 dev-tools/idea/.idea/modules.xml                |   1 +
 dev-tools/idea/lucene/analysis/nori/nori.iml    |  22 +
 .../maven/lucene/analysis/nori/pom.xml.template |  75 ++
 .../maven/lucene/analysis/pom.xml.template      |   1 +
 lucene/CHANGES.txt                              |   6 +
 lucene/NOTICE.txt                               |  12 +
 lucene/analysis/README.txt                      |   5 +
 lucene/analysis/build.xml                       |   7 +-
 lucene/analysis/nori/build.xml                  | 135 +++
 lucene/analysis/nori/ivy.xml                    |  33 +
 .../lucene/analysis/ko/DecompoundToken.java     |  69 ++
 .../lucene/analysis/ko/DictionaryToken.java     | 100 ++
 .../lucene/analysis/ko/GraphvizFormatter.java   | 180 ++++
 .../lucene/analysis/ko/KoreanAnalyzer.java      |  78 ++
 .../ko/KoreanPartOfSpeechStopFilter.java        |  85 ++
 .../ko/KoreanPartOfSpeechStopFilterFactory.java |  51 +
 .../analysis/ko/KoreanReadingFormFilter.java    |  51 +
 .../ko/KoreanReadingFormFilterFactory.java      |  42 +
 .../lucene/analysis/ko/KoreanTokenizer.java     | 957 +++++++++++++++++++
 .../analysis/ko/KoreanTokenizerFactory.java     |  89 ++
 .../java/org/apache/lucene/analysis/ko/POS.java | 304 ++++++
 .../org/apache/lucene/analysis/ko/Token.java    | 125 +++
 .../analysis/ko/dict/BinaryDictionary.java      | 239 +++++
 .../analysis/ko/dict/CharacterDefinition.java   | 136 +++
 .../analysis/ko/dict/ConnectionCosts.java       |  96 ++
 .../lucene/analysis/ko/dict/Dictionary.java     |  83 ++
 .../analysis/ko/dict/TokenInfoDictionary.java   |  77 ++
 .../lucene/analysis/ko/dict/TokenInfoFST.java   |  85 ++
 .../analysis/ko/dict/UnknownDictionary.java     |  61 ++
 .../lucene/analysis/ko/dict/UserDictionary.java | 235 +++++
 .../lucene/analysis/ko/dict/package-info.java   |  21 +
 .../apache/lucene/analysis/ko/package-info.java |  21 +
 .../tokenattributes/PartOfSpeechAttribute.java  |  54 ++
 .../PartOfSpeechAttributeImpl.java              |  92 ++
 .../ko/tokenattributes/ReadingAttribute.java    |  38 +
 .../tokenattributes/ReadingAttributeImpl.java   |  55 ++
 .../ko/tokenattributes/package-info.java        |  21 +
 .../apache/lucene/analysis/ko/util/CSVUtil.java |  95 ++
 .../lucene/analysis/ko/util/package-info.java   |  21 +
 lucene/analysis/nori/src/java/overview.html     |  34 +
 ...ache.lucene.analysis.util.TokenFilterFactory |  16 +
 ...apache.lucene.analysis.util.TokenizerFactory |  16 +
 .../analysis/ko/dict/CharacterDefinition.dat    | Bin 0 -> 65564 bytes
 .../lucene/analysis/ko/dict/ConnectionCosts.dat | Bin 0 -> 11178837 bytes
 .../ko/dict/TokenInfoDictionary$buffer.dat      | Bin 0 -> 7245625 bytes
 .../ko/dict/TokenInfoDictionary$fst.dat         | Bin 0 -> 5640925 bytes
 .../ko/dict/TokenInfoDictionary$posDict.dat     | Bin 0 -> 2712 bytes
 .../ko/dict/TokenInfoDictionary$targetMap.dat   | Bin 0 -> 811783 bytes
 .../ko/dict/UnknownDictionary$buffer.dat        | Bin 0 -> 101 bytes
 .../ko/dict/UnknownDictionary$posDict.dat       | Bin 0 -> 1823 bytes
 .../ko/dict/UnknownDictionary$targetMap.dat     | Bin 0 -> 36 bytes
 .../analysis/ko/StringMockResourceLoader.java   |  58 ++
 .../lucene/analysis/ko/TestKoreanAnalyzer.java  | 109 +++
 ...TestKoreanPartOfSpeechStopFilterFactory.java |  59 ++
 .../ko/TestKoreanReadingFormFilter.java         |  75 ++
 .../ko/TestKoreanReadingFormFilterFactory.java  |  51 +
 .../lucene/analysis/ko/TestKoreanTokenizer.java | 355 +++++++
 .../analysis/ko/TestKoreanTokenizerFactory.java | 113 +++
 .../ko/dict/TestTokenInfoDictionary.java        | 113 +++
 .../analysis/ko/dict/UserDictionaryTest.java    |  62 ++
 .../org/apache/lucene/analysis/ko/userdict.txt  |   5 +
 .../ko/util/BinaryDictionaryWriter.java         | 282 ++++++
 .../ko/util/CharacterDefinitionWriter.java      |  94 ++
 .../ko/util/ConnectionCostsBuilder.java         |  67 ++
 .../analysis/ko/util/ConnectionCostsWriter.java |  75 ++
 .../analysis/ko/util/DictionaryBuilder.java     |  67 ++
 .../ko/util/TokenInfoDictionaryBuilder.java     | 150 +++
 .../ko/util/TokenInfoDictionaryWriter.java      |  49 +
 .../ko/util/UnknownDictionaryBuilder.java       | 134 +++
 .../ko/util/UnknownDictionaryWriter.java        |  65 ++
 .../analysis/ko/dict/UnknownDictionaryTest.java |  74 ++
 lucene/ivy-versions.properties                  |   1 +
 lucene/module-build.xml                         |  22 +
 73 files changed, 6003 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/dev-tools/idea/.idea/modules.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml
index 207a675..f77c34a 100644
--- a/dev-tools/idea/.idea/modules.xml
+++ b/dev-tools/idea/.idea/modules.xml
@@ -13,6 +13,7 @@
 
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/common/analysis-common.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/icu/icu.iml" />
+      <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/nori/nori.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/kuromoji/kuromoji.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/morfologik/morfologik.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/opennlp/opennlp.iml" />

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/dev-tools/idea/lucene/analysis/nori/nori.iml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/lucene/analysis/nori/nori.iml b/dev-tools/idea/lucene/analysis/nori/nori.iml
new file mode 100644
index 0000000..aa2d18e
--- /dev/null
+++ b/dev-tools/idea/lucene/analysis/nori/nori.iml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="false">
+    <output url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/godori/classes/java" />
+    <output-test url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/godori/classes/test" />
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/src/tools/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/tools/test" isTestSource="true" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="ICU library" level="project" />
+    <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
+    <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
+    <orderEntry type="module" module-name="analysis-common" />
+    <orderEntry type="module" module-name="lucene-core" />
+  </component>
+</module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/dev-tools/maven/lucene/analysis/nori/pom.xml.template
----------------------------------------------------------------------
diff --git a/dev-tools/maven/lucene/analysis/nori/pom.xml.template b/dev-tools/maven/lucene/analysis/nori/pom.xml.template
new file mode 100644
index 0000000..ac37a08
--- /dev/null
+++ b/dev-tools/maven/lucene/analysis/nori/pom.xml.template
@@ -0,0 +1,75 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-parent</artifactId>
+    <version>@version@</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-analyzers-nori</artifactId>
+  <packaging>jar</packaging>
+  <name>Lucene Nori Korean Morphological Analyzer</name>
+  <description>
+    Lucene Nori Korean Morphological Analyzer
+  </description>
+  <properties>
+    <module-directory>lucene/analysis/nori</module-directory>
+    <relative-top-level>../../../..</relative-top-level>
+    <module-path>${relative-top-level}/${module-directory}</module-path>
+  </properties>
+  <scm>
+    <connection>scm:git:${vc-anonymous-base-url}</connection>
+    <developerConnection>scm:git:${vc-dev-base-url}</developerConnection>
+    <url>${vc-browse-base-url};f=${module-directory}</url>
+  </scm>
+  <dependencies>
+    <dependency> 
+      <!-- lucene-test-framework dependency must be declared before lucene-core -->
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-test-framework</artifactId>
+      <scope>test</scope>
+    </dependency>
+@lucene-analyzers-nori.internal.dependencies@
+@lucene-analyzers-nori.external.dependencies@
+@lucene-analyzers-nori.internal.test.dependencies@
+@lucene-analyzers-nori.external.test.dependencies@
+  </dependencies>
+  <build>
+    <sourceDirectory>${module-path}/src/java</sourceDirectory>
+    <testSourceDirectory>${module-path}/src/test</testSourceDirectory>
+    <resources>
+      <resource>
+        <directory>${module-path}/src/resources</directory>
+      </resource>
+    </resources>
+    <testResources>
+      <testResource>
+        <directory>${project.build.testSourceDirectory}</directory>
+        <excludes>
+          <exclude>**/*.java</exclude>
+        </excludes>
+      </testResource>
+    </testResources>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/dev-tools/maven/lucene/analysis/pom.xml.template
----------------------------------------------------------------------
diff --git a/dev-tools/maven/lucene/analysis/pom.xml.template b/dev-tools/maven/lucene/analysis/pom.xml.template
index 466ad30..dada0d5 100644
--- a/dev-tools/maven/lucene/analysis/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/pom.xml.template
@@ -35,6 +35,7 @@
     <module>icu</module>
     <module>kuromoji</module>
     <module>morfologik</module>
+    <module>nori</module>
     <module>opennlp</module>
     <module>phonetic</module>
     <module>smartcn</module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 9016a50..a0e339e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -41,6 +41,12 @@ API Changes
 * LUCENE-8242: Deprecated method IndexSearcher#createNormalizedWeight() has
   been removed (Alan Woodward)
 
+New Features
+
+* LUCENE-8231: A new analysis module (nori) similar to Kuromoji
+  but to handle Korean using mecab-ko-dic and morphological analysis.
+  (Robert Muir, Jim Ferenczi)
+
 Changes in Runtime Behavior
 
 * LUCENE-7837: Indices that were created before the previous major version

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/NOTICE.txt
----------------------------------------------------------------------
diff --git a/lucene/NOTICE.txt b/lucene/NOTICE.txt
index e25c211..4970d20 100644
--- a/lucene/NOTICE.txt
+++ b/lucene/NOTICE.txt
@@ -190,3 +190,15 @@ grants independently of ICOT any specific warranty to the user in
 writing, such person, organization or entity, will also be exempted
 from and not be held liable to the user for any such damages as noted
 above as far as the program is concerned.
+
+===========================================================================
+Nori Korean Morphological Analyzer - Apache Lucene Integration
+===========================================================================
+
+This software includes a binary and/or source version of data from
+
+  mecab-ko-dic-2.0.3-20170922
+
+which can be obtained from
+
+  https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.3-20170922.tar.gz

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/README.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/README.txt b/lucene/analysis/README.txt
index c68584e..8a9c8d9 100644
--- a/lucene/analysis/README.txt
+++ b/lucene/analysis/README.txt
@@ -28,6 +28,9 @@ lucene-analyzers-kuromoji-XX.jar
 lucene-analyzers-morfologik-XX.jar
   An analyzer using the Morfologik stemming library.
 
+lucene-analyzers-nori-XX.jar
+  An analyzer with morphological analysis for Korean.
+
 lucene-analyzers-opennlp-XX.jar
   An analyzer using the OpenNLP natural-language processing library.
 
@@ -52,6 +55,7 @@ common/src/java
 icu/src/java
 kuromoji/src/java
 morfologik/src/java
+nori/src/java
 opennlp/src/java
 phonetic/src/java
 smartcn/src/java
@@ -63,6 +67,7 @@ common/src/test
 icu/src/test
 kuromoji/src/test
 morfologik/src/test
+nori/src/test
 opennlp/src/test
 phonetic/src/test
 smartcn/src/test

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/build.xml b/lucene/analysis/build.xml
index ed1566c..95dd7dc 100644
--- a/lucene/analysis/build.xml
+++ b/lucene/analysis/build.xml
@@ -25,6 +25,7 @@
       - icu: Analyzers that use functionality from ICU
       - kuromoji: Japanese Morphological Analyzer
       - morfologik: Morfologik Stemmer
+      - nori: Korean Morphological Analyzer
       - smartcn: Smart Analyzer for Simplified Chinese Text
       - stempel: Algorithmic Stemmer for Polish
       - uima: UIMA Analysis module
@@ -65,6 +66,10 @@
     <ant dir="morfologik" />
   </target>
 
+  <target name="nori">
+    <ant dir="nori" />
+  </target>
+
   <target name="opennlp">
     <ant dir="opennlp" />
   </target>
@@ -86,7 +91,7 @@
   </target>
 
   <target name="default" depends="compile"/>
-  <target name="compile" depends="common,icu,kuromoji,morfologik,opennlp,phonetic,smartcn,stempel,uima" />
+  <target name="compile" depends="common,icu,kuromoji,morfologik,nori,opennlp,phonetic,smartcn,stempel,uima" />
 
   <target name="clean">
     <forall-analyzers target="clean"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/build.xml b/lucene/analysis/nori/build.xml
new file mode 100644
index 0000000..dacf3a9
--- /dev/null
+++ b/lucene/analysis/nori/build.xml
@@ -0,0 +1,135 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="analyzers-nori" default="default" xmlns:ivy="antlib:org.apache.ivy.ant">
+
+  <description>
+    Korean Morphological Analyzer
+  </description>
+
+  <!-- currently whether rat detects this as binary or not
+       is platform dependent?! -->
+  <property name="rat.excludes" value="**/*.txt,**/bocchan.utf-8"/>
+  <property name="rat.additional-includes" value="src/tools/**"/>
+
+  <!-- we don't want to pull in ipadic/naist etc -->
+  <property name="ivy.default.configuration" value="default"/>
+  <import file="../analysis-module-build.xml"/>
+
+  <!-- default configuration for Korean: uses mecab-ko-dic -->
+  <property name="dict.type" value="mecab-ko-dic"/>
+  <property name="dict.version" value="mecab-ko-dic-2.0.3-20170922" />
+
+  <property name="dict.src.file" value="${dict.version}.tar.gz" />
+  <property name="dict.src.dir" value="${build.dir}/${dict.version}" />
+  <property name="dict.encoding" value="utf-8"/>
+  <property name="dict.normalize" value="false"/>
+  <property name="dict.target.dir" location="${resources.dir}"/>
+
+  <available type="dir" file="${build.dir}/${dict.version}" property="mecab-ko.dict.available"/>
+
+  <path id="classpath">
+    <pathelement path="${analyzers-common.jar}"/>
+    <path refid="base.classpath"/>
+  </path>
+
+  <target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
+  <target name="download-dict" depends="ivy-availability-check,ivy-fail,ivy-configure" unless="mecab-ko.dict.available">
+    <ivy:retrieve pattern="${build.dir}/${dict.src.file}" conf="${dict.type}" symlink="${ivy.symlink}"/>
+    <!-- TODO: we should checksum too -->
+    <gunzip src="${build.dir}/${dict.src.file}"/>
+    <untar src="${build.dir}/${dict.version}.tar" dest="${build.dir}"/>
+  </target>
+
+  <path id="tools.dependencies">
+    <fileset dir="../icu/lib"/>
+  </path>
+
+  <path id="tools.classpath">
+    <path refid="classpath"/>
+    <path refid="tools.dependencies"/>
+    <pathelement location="${build.dir}/classes/java"/>
+    <pathelement location="${build.dir}/classes/tools"/>
+  </path>
+
+  <path id="tools.test.classpath">
+    <path refid="tools.classpath"/>
+    <path refid="test.base.classpath"/>
+    <pathelement location="${build.dir}/classes/tools-test"/>
+  </path>
+
+  <target name="build-dict" depends="compile-tools, download-dict">
+    <sequential>
+      <delete verbose="true">
+        <fileset dir="${resources.dir}/org/apache/lucene/analysis/ko/dict" includes="**/*"/>
+      </delete>
+      <!-- TODO: optimize the dictionary construction a bit so that you don't need 1G -->
+      <java fork="true" failonerror="true" maxmemory="1g" classname="org.apache.lucene.analysis.ko.util.DictionaryBuilder">
+        <classpath>
+          <path refid="tools.classpath"/>
+        </classpath>
+        <assertions>
+          <enable package="org.apache.lucene"/>
+        </assertions>
+        <arg value="${dict.src.dir}"/>
+        <arg value="${dict.target.dir}"/>
+        <arg value="${dict.encoding}"/>
+        <arg value="${dict.normalize}"/>
+      </java>
+    </sequential>
+  </target>
+
+  <!-- we don't actually need to compile this thing, we just want its lib -->
+  <target name="resolve-icu">
+    <ant dir="../icu/" target="resolve" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+  </target>
+
+  <target name="compile-tools" depends="resolve-icu, compile-core, common.compile-tools">
+    <compile
+        srcdir="src/tools/java"
+        destdir="${build.dir}/classes/tools">
+      <classpath>
+        <path refid="tools.classpath"/>
+      </classpath>
+    </compile>
+  </target>
+
+  <target name="compile-tools-tests" depends="compile-tools">
+    <compile
+        srcdir="src/tools/test"
+        destdir="${build.dir}/classes/tools-test">
+      <classpath>
+        <path refid="tools.test.classpath"/>
+        <pathelement path="src/tools/test"/>
+      </classpath>
+    </compile>
+  </target>
+
+  <target name="test-tools" depends="compile-tools-tests">
+    <test-macro dataDir="src/tools/test" junit.classpath="tools.test.classpath"/>
+  </target>
+
+  <target name="compile-test" depends="module-build.compile-test, compile-tools-tests"/>
+  <!-- TODO: not until we properly make 'test-tools' work with clover etc
+  <target name="test" depends="module-build.test, test-tools"/> -->
+
+  <target name="regenerate" depends="build-dict"/>
+</project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/ivy.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/ivy.xml b/lucene/analysis/nori/ivy.xml
new file mode 100644
index 0000000..8d32937
--- /dev/null
+++ b/lucene/analysis/nori/ivy.xml
@@ -0,0 +1,33 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<ivy-module version="2.0">
+  <info organisation="org.apache.lucene" module="analyzers-nori"/>
+
+  <configurations defaultconfmapping="mecab-ko-dic->default"> <!-- 'master' conf not available to map to -->
+    <conf name="default" description="explicitly declare this configuration in order to not download dictionaries unless explicitly called for"/>
+    <conf name="mecab-ko-dic" description="mecab-ko dictionary for Korean" transitive="false"/>
+  </configurations>
+
+  <dependencies>
+    <dependency org="mecab" name="mecab-ko-dic" rev="${/mecab/mecab-ko-dic}" conf="mecab-ko-dic">
+      <artifact name="mecab-ko-dic" type=".tar.gz" url="https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.0.3-20170922.tar.gz" />
+    </dependency>
+    <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
+  </dependencies>
+</ivy-module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
new file mode 100644
index 0000000..a44a2d8
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import org.apache.lucene.analysis.ko.dict.Dictionary;
+
+/**
+ * A token that was generated from a compound.
+ */
+public class DecompoundToken extends Token {
+  private final POS.Tag posTag;
+
+  /**
+   *  Creates a new DecompoundToken
+   * @param posTag The part of speech of the token.
+   * @param surfaceForm The surface form of the token.
+   * @param startOffset The start offset of the token in the analyzed text.
+   * @param endOffset The end offset of the token in the analyzed text.
+   */
+  public DecompoundToken(POS.Tag posTag, String surfaceForm, int startOffset, int endOffset) {
+    super(surfaceForm.toCharArray(), 0, surfaceForm.length(), startOffset, endOffset);
+    this.posTag = posTag;
+  }
+
+  @Override
+  public String toString() {
+    return "DecompoundToken(\"" + getSurfaceFormString() + "\" pos=" + getStartOffset() + " length=" + getLength() +
+        " startOffset=" + getStartOffset() + " endOffset=" + getEndOffset() + ")";
+  }
+
+  @Override
+  public POS.Type getPOSType() {
+    return POS.Type.MORPHEME;
+  }
+
+  @Override
+  public POS.Tag getLeftPOS() {
+    return posTag;
+  }
+
+  @Override
+  public POS.Tag getRightPOS() {
+    return posTag;
+  }
+
+  @Override
+  public String getReading() {
+    return null;
+  }
+
+  @Override
+  public Dictionary.Morpheme[] getMorphemes() {
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
new file mode 100644
index 0000000..3efb119
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import org.apache.lucene.analysis.ko.dict.Dictionary;
+
+/**
+ * A token stored in a {@link Dictionary}.
+ */
+public class DictionaryToken extends Token {
+  private final int wordId;
+  private final KoreanTokenizer.Type type;
+  private final Dictionary dictionary;
+
+  public DictionaryToken(KoreanTokenizer.Type type, Dictionary dictionary, int wordId, char[] surfaceForm,
+                         int offset, int length, int startOffset, int endOffset) {
+    super(surfaceForm, offset, length, startOffset, endOffset);
+    this.type = type;
+    this.dictionary = dictionary;
+    this.wordId = wordId;
+  }
+
+  @Override
+  public String toString() {
+    return "DictionaryToken(\"" + getSurfaceFormString() + "\" pos=" + getStartOffset() + " length=" + getLength() +
+        " posLen=" + getPositionLength() + " type=" + type + " wordId=" + wordId +
+        " leftID=" + dictionary.getLeftId(wordId) + ")";
+  }
+
+  /**
+   * Returns the type of this token
+   * @return token type, not null
+   */
+  public KoreanTokenizer.Type getType() {
+    return type;
+  }
+
+  /**
+   * Returns true if this token is known word
+   * @return true if this token is in standard dictionary. false if not.
+   */
+  public boolean isKnown() {
+    return type == KoreanTokenizer.Type.KNOWN;
+  }
+
+  /**
+   * Returns true if this token is unknown word
+   * @return true if this token is unknown word. false if not.
+   */
+  public boolean isUnknown() {
+    return type == KoreanTokenizer.Type.UNKNOWN;
+  }
+
+  /**
+   * Returns true if this token is defined in user dictionary
+   * @return true if this token is in user dictionary. false if not.
+   */
+  public boolean isUser() {
+    return type == KoreanTokenizer.Type.USER;
+  }
+
+  @Override
+  public POS.Type getPOSType() {
+    return dictionary.getPOSType(wordId);
+  }
+
+  @Override
+  public POS.Tag getLeftPOS() {
+    return dictionary.getLeftPOS(wordId);
+  }
+
+  @Override
+  public POS.Tag getRightPOS() {
+    return dictionary.getRightPOS(wordId);
+  }
+
+  @Override
+  public String getReading() {
+    return dictionary.getReading(wordId);
+  }
+
+  @Override
+  public Dictionary.Morpheme[] getMorphemes() {
+    return dictionary.getMorphemes(wordId, getSurfaceForm(), getOffset(), getLength());
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java
new file mode 100644
index 0000000..9feb354
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.ko.KoreanTokenizer.Position;
+import org.apache.lucene.analysis.ko.KoreanTokenizer.WrappedPositionArray;
+import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
+import org.apache.lucene.analysis.ko.dict.Dictionary;
+
+
+// TODO: would be nice to show 2nd best path in a diff't
+// color...
+
+/**
+ * Outputs the dot (graphviz) string for the viterbi lattice.
+ */
+public class GraphvizFormatter {
+  
+  private final static String BOS_LABEL = "BOS";
+  
+  private final static String EOS_LABEL = "EOS";
+  
+  private final static String FONT_NAME = "Helvetica";
+  
+  private final ConnectionCosts costs;
+  
+  private final Map<String, String> bestPathMap;
+  
+  private final StringBuilder sb = new StringBuilder();
+  
+  public GraphvizFormatter(ConnectionCosts costs) {
+    this.costs = costs;
+    this.bestPathMap = new HashMap<>();
+    sb.append(formatHeader());
+    sb.append("  init [style=invis]\n");
+    sb.append("  init -> 0.0 [label=\"" + BOS_LABEL + "\"]\n");
+  }
+
+  public String finish() {
+    sb.append(formatTrailer());
+    return sb.toString();
+  }
+
+  // Backtraces another incremental fragment:
+  void onBacktrace(KoreanTokenizer tok, WrappedPositionArray positions, int lastBackTracePos, Position endPosData, int fromIDX, char[] fragment, boolean isEnd) {
+    setBestPathMap(positions, lastBackTracePos, endPosData, fromIDX);
+    sb.append(formatNodes(tok, positions, lastBackTracePos, endPosData, fragment));
+    if (isEnd) {
+      sb.append("  fini [style=invis]\n");
+      sb.append("  ");
+      sb.append(getNodeID(endPosData.pos, fromIDX));
+      sb.append(" -> fini [label=\"" + EOS_LABEL + "\"]");
+    }
+  }
+
+  // Records which arcs make up the best bath:
+  private void setBestPathMap(WrappedPositionArray positions, int startPos, Position endPosData, int fromIDX) {
+    bestPathMap.clear();
+
+    int pos = endPosData.pos;
+    int bestIDX = fromIDX;
+    while (pos > startPos) {
+      final Position posData = positions.get(pos);
+
+      final int backPos = posData.backPos[bestIDX];
+      final int backIDX = posData.backIndex[bestIDX];
+
+      final String toNodeID = getNodeID(pos, bestIDX);
+      final String fromNodeID = getNodeID(backPos, backIDX);
+      
+      assert !bestPathMap.containsKey(fromNodeID);
+      assert !bestPathMap.containsValue(toNodeID);
+      bestPathMap.put(fromNodeID, toNodeID);
+      pos = backPos;
+      bestIDX = backIDX;
+    }
+  }
+  
+  private String formatNodes(KoreanTokenizer tok, WrappedPositionArray positions, int startPos, Position endPosData, char[] fragment) {
+
+    StringBuilder sb = new StringBuilder();
+    // Output nodes
+    for (int pos = startPos+1; pos <= endPosData.pos; pos++) {
+      final Position posData = positions.get(pos);
+      for(int idx=0;idx<posData.count;idx++) {
+        sb.append("  ");
+        sb.append(getNodeID(pos, idx));
+        sb.append(" [label=\"");
+        sb.append(pos);
+        sb.append(": ");
+        sb.append(posData.lastRightID[idx]);
+        sb.append("\"]\n");
+      }
+    }
+
+    // Output arcs
+    for (int pos = endPosData.pos; pos > startPos; pos--) {
+      final Position posData = positions.get(pos);
+      for(int idx=0;idx<posData.count;idx++) {
+        final Position backPosData = positions.get(posData.backPos[idx]);
+        final String toNodeID = getNodeID(pos, idx);
+        final String fromNodeID = getNodeID(posData.backPos[idx], posData.backIndex[idx]);
+
+        sb.append("  ");
+        sb.append(fromNodeID);
+        sb.append(" -> ");
+        sb.append(toNodeID);
+
+        final String attrs;
+        if (toNodeID.equals(bestPathMap.get(fromNodeID))) {
+          // This arc is on best path
+          attrs = " color=\"#40e050\" fontcolor=\"#40a050\" penwidth=3 fontsize=20";
+        } else {
+          attrs = "";
+        }
+
+        final Dictionary dict = tok.getDict(posData.backType[idx]);
+        final int wordCost = dict.getWordCost(posData.backID[idx]);
+        final int bgCost = costs.get(backPosData.lastRightID[posData.backIndex[idx]],
+                                     dict.getLeftId(posData.backID[idx]));
+
+        final String surfaceForm = new String(fragment,
+                                              posData.backPos[idx] - startPos,
+                                              pos - posData.backPos[idx]);
+        
+        sb.append(" [label=\"");
+        sb.append(surfaceForm);
+        sb.append(' ');
+        sb.append(wordCost);
+        if (bgCost >= 0) {
+          sb.append('+');
+        }
+        sb.append(bgCost);
+        sb.append("\"");
+        sb.append(attrs);
+        sb.append("]\n");
+      }
+    }
+    return sb.toString();
+  }
+  
+  private String formatHeader() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("digraph viterbi {\n");
+    sb.append("  graph [ fontsize=30 labelloc=\"t\" label=\"\" splines=true overlap=false rankdir = \"LR\"];\n");
+    //sb.append("  // A2 paper size\n");
+    //sb.append("  size = \"34.4,16.5\";\n");
+    //sb.append("  // try to fill paper\n");
+    //sb.append("  ratio = fill;\n");
+    sb.append("  edge [ fontname=\"" + FONT_NAME + "\" fontcolor=\"red\" color=\"#606060\" ]\n");
+    sb.append("  node [ style=\"filled\" fillcolor=\"#e8e8f0\" shape=\"Mrecord\" fontname=\"" + FONT_NAME + "\" ]\n");
+    
+    return sb.toString();
+  }
+  
+  private String formatTrailer() {
+    return "}";
+  }
+  
+  private String getNodeID(int pos, int idx) {
+    return pos + "." + idx;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java
new file mode 100644
index 0000000..76023bb
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanAnalyzer.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
+import org.apache.lucene.analysis.ko.dict.UserDictionary;
+
+import static org.apache.lucene.analysis.TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY;
+
+/**
+ * Analyzer for Korean that uses morphological analysis.
+ * @see KoreanTokenizer
+ * @lucene.experimental
+ */
+public class KoreanAnalyzer extends Analyzer {
+  private final UserDictionary userDict;
+  private final KoreanTokenizer.DecompoundMode mode;
+  private final Set<POS.Tag> stopTags;
+  private final boolean outputUnknownUnigrams;
+
+  /**
+   * Creates a new KoreanAnalyzer.
+   */
+  public KoreanAnalyzer() {
+    this(null, KoreanTokenizer.DEFAULT_DECOMPOUND, KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS, false);
+  }
+
+  /**
+   * Creates a new KoreanAnalyzer.
+   *
+   * @param userDict Optional: if non-null, user dictionary.
+   * @param mode Decompound mode.
+   * @param stopTags The set of part of speech that should be filtered.
+   * @param outputUnknownUnigrams If true outputs unigrams for unknown words.
+   */
+  public KoreanAnalyzer(UserDictionary userDict, DecompoundMode mode, Set<POS.Tag> stopTags, boolean outputUnknownUnigrams) {
+    super();
+    this.userDict = userDict;
+    this.mode = mode;
+    this.stopTags = stopTags;
+    this.outputUnknownUnigrams = outputUnknownUnigrams;
+  }
+  
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName) {
+    Tokenizer tokenizer = new KoreanTokenizer(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDict, mode, outputUnknownUnigrams);
+    TokenStream stream = new KoreanPartOfSpeechStopFilter(tokenizer, stopTags);
+    stream = new KoreanReadingFormFilter(stream);
+    stream = new LowerCaseFilter(stream);
+    return new TokenStreamComponents(tokenizer, stream);
+  }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new LowerCaseFilter(in);
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java
new file mode 100644
index 0000000..4fa7524
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilter.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+
+import java.util.Arrays;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.lucene.analysis.FilteringTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute;
+
+/**
+ * Removes tokens that match a set of part-of-speech tags.
+ * @lucene.experimental
+ */
+public final class KoreanPartOfSpeechStopFilter extends FilteringTokenFilter {
+  private final Set<POS.Tag> stopTags;
+  private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);
+
+  /**
+   * Default list of tags to filter.
+   */
+  public static final Set<POS.Tag> DEFAULT_STOP_TAGS = Arrays.asList(
+      POS.Tag.E,
+      POS.Tag.IC,
+      POS.Tag.J,
+      POS.Tag.MAG,
+      POS.Tag.MAJ,
+      POS.Tag.MM,
+      POS.Tag.SP,
+      POS.Tag.SSC,
+      POS.Tag.SSO,
+      POS.Tag.SC,
+      POS.Tag.SE,
+      POS.Tag.XPN,
+      POS.Tag.XSA,
+      POS.Tag.XSN,
+      POS.Tag.XSV,
+      POS.Tag.UNA,
+      POS.Tag.NA,
+      POS.Tag.VSV
+  ).stream().collect(Collectors.toSet());
+
+  /**
+   * Create a new {@link KoreanPartOfSpeechStopFilter} with the default
+   * list of stop tags {@link #DEFAULT_STOP_TAGS}.
+   *
+   * @param input    the {@link TokenStream} to consume
+   */
+  public KoreanPartOfSpeechStopFilter(TokenStream input) {
+    this(input, DEFAULT_STOP_TAGS);
+  }
+
+  /**
+   * Create a new {@link KoreanPartOfSpeechStopFilter}.
+   * @param input    the {@link TokenStream} to consume
+   * @param stopTags the part-of-speech tags that should be removed
+   */
+  public KoreanPartOfSpeechStopFilter(TokenStream input, Set<POS.Tag> stopTags) {
+    super(input);
+    this.stopTags = stopTags;
+  }
+
+  @Override
+  protected boolean accept() {
+    final POS.Tag leftPOS = posAtt.getLeftPOS();
+    return leftPOS == null || !stopTags.contains(leftPOS);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
new file mode 100644
index 0000000..010abc8
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link KoreanPartOfSpeechStopFilter}.
+ * @lucene.experimental
+ */
+public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
+  private Set<POS.Tag> stopTags;
+
+  /** Creates a new JapanesePartOfSpeechStopFilterFactory */
+  public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
+    super(args);
+    Set<String> stopTagStr = getSet(args, "tags");
+    if (stopTagStr == null) {
+      stopTags = KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS;
+    } else {
+      stopTags = stopTagStr.stream().map(POS::resolveTag).collect(Collectors.toSet());
+    }
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
+  @Override
+  public TokenStream create(TokenStream stream) {
+      return new KoreanPartOfSpeechStopFilter(stream, stopTags);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilter.java
new file mode 100644
index 0000000..8b7e6cb
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilter.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * Replaces term text with the {@link ReadingAttribute} which is
+ * the Hangul transcription of Hanja characters.
+ * @lucene.experimental
+ */
+public final class KoreanReadingFormFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final ReadingAttribute readingAtt = addAttribute(ReadingAttribute.class);
+
+  public KoreanReadingFormFilter(TokenStream input) {
+    super(input);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      String reading = readingAtt.getReading();
+      if (reading != null) {
+        termAtt.setEmpty().append(reading);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
new file mode 100644
index 0000000..860a139
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link KoreanReadingFormFilter}.
+ * @lucene.experimental
+ */
+public class KoreanReadingFormFilterFactory extends TokenFilterFactory {
+
+  /** Creates a new KoreanReadingFilterFactory */
+  public KoreanReadingFormFilterFactory(Map<String,String> args) {
+    super(args);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+  
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new KoreanReadingFormFilter(input);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
new file mode 100644
index 0000000..822853b
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
@@ -0,0 +1,957 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.EnumMap;
+import java.util.List;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
+import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
+import org.apache.lucene.analysis.ko.dict.Dictionary;
+import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;
+import org.apache.lucene.analysis.ko.dict.TokenInfoFST;
+import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
+import org.apache.lucene.analysis.ko.dict.UserDictionary;
+import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute;
+import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.RollingCharBuffer;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.fst.FST;
+
+/**
+ * Tokenizer for Korean that uses morphological analysis.
+ * <p>
+ * This tokenizer sets a number of additional attributes:
+ * <ul>
+ *   <li>{@link PartOfSpeechAttribute} containing part-of-speech.
+ *   <li>{@link ReadingAttribute} containing reading.
+ * </ul>
+ * <p>
+ * This tokenizer uses a rolling Viterbi search to find the
+ * least cost segmentation (path) of the incoming characters.
+ * @lucene.experimental
+ */
+public final class KoreanTokenizer extends Tokenizer {
+
+  /**
+   * Token type reflecting the original source of this token
+   */
+  public enum Type {
+    /**
+     * Known words from the system dictionary.
+     */
+    KNOWN,
+    /**
+     * Unknown words (heuristically segmented).
+     */
+    UNKNOWN,
+    /**
+     * Known words from the user dictionary.
+     */
+    USER
+  }
+
+  /**
+   * Decompound mode: this determines how the tokenizer handles
+   * {@link POS.Type#COMPOUND}, {@link POS.Type#INFLECT} and {@link POS.Type#PREANALYSIS} tokens.
+   */
+  public enum DecompoundMode {
+    /**
+     * No decomposition for compound.
+     */
+    NONE,
+
+    /**
+     * Decompose compounds and discards the original form (default).
+     */
+    DISCARD,
+
+    /**
+     * Decompose compounds and keeps the original form.
+     */
+    MIXED
+  }
+
+  /**
+   * Default mode for the decompound of tokens ({@link DecompoundMode#DISCARD}.
+   */
+  public static final DecompoundMode DEFAULT_DECOMPOUND = DecompoundMode.DISCARD;
+
+  private static final boolean VERBOSE = false;
+
+  // For safety:
+  private static final int MAX_UNKNOWN_WORD_LENGTH = 1024;
+  private static final int MAX_BACKTRACE_GAP = 1024;
+
+  private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap<>(Type.class);
+
+  private final TokenInfoFST fst;
+  private final TokenInfoDictionary dictionary;
+  private final UnknownDictionary unkDictionary;
+  private final ConnectionCosts costs;
+  private final UserDictionary userDictionary;
+  private final CharacterDefinition characterDefinition;
+
+  private final FST.Arc<Long> arc = new FST.Arc<>();
+  private final FST.BytesReader fstReader;
+  private final IntsRef wordIdRef = new IntsRef();
+
+  private final FST.BytesReader userFSTReader;
+  private final TokenInfoFST userFST;
+
+  private final DecompoundMode mode;
+  private final boolean outputUnknownUnigrams;
+
+  private final RollingCharBuffer buffer = new RollingCharBuffer();
+
+  private final WrappedPositionArray positions = new WrappedPositionArray();
+
+  // True once we've hit the EOF from the input reader:
+  private boolean end;
+
+  // Last absolute position we backtraced from:
+  private int lastBackTracePos;
+
+  // Next absolute position to process:
+  private int pos;
+
+  // Already parsed, but not yet passed to caller, tokens:
+  private final List<Token> pending = new ArrayList<>();
+
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+  private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
+  private final PartOfSpeechAttribute posAtt = addAttribute(PartOfSpeechAttribute.class);
+  private final ReadingAttribute readingAtt = addAttribute(ReadingAttribute.class);
+
+
+  /**
+   * Creates a new KoreanTokenizer with default parameters.
+   * <p>
+   * Uses the default AttributeFactory.
+   */
+  public KoreanTokenizer() {
+    this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, null, DEFAULT_DECOMPOUND, false);
+  }
+
+  /**
+   * Create a new KoreanTokenizer.
+   *
+   * @param factory the AttributeFactory to use
+   * @param userDictionary Optional: if non-null, user dictionary.
+   * @param mode Decompound mode.
+   * @param outputUnknownUnigrams If true outputs unigrams for unknown words.
+   */
+  public KoreanTokenizer(AttributeFactory factory, UserDictionary userDictionary, DecompoundMode mode, boolean outputUnknownUnigrams) {
+    super(factory);
+    this.mode = mode;
+    this.outputUnknownUnigrams = outputUnknownUnigrams;
+    dictionary = TokenInfoDictionary.getInstance();
+    fst = dictionary.getFST();
+    unkDictionary = UnknownDictionary.getInstance();
+    characterDefinition = unkDictionary.getCharacterDefinition();
+    this.userDictionary = userDictionary;
+    costs = ConnectionCosts.getInstance();
+    fstReader = fst.getBytesReader();
+    if (userDictionary != null) {
+      userFST = userDictionary.getFST();
+      userFSTReader = userFST.getBytesReader();
+    } else {
+      userFST = null;
+      userFSTReader = null;
+    }
+
+    buffer.reset(this.input);
+
+    resetState();
+
+    dictionaryMap.put(Type.KNOWN, dictionary);
+    dictionaryMap.put(Type.UNKNOWN, unkDictionary);
+    dictionaryMap.put(Type.USER, userDictionary);
+  }
+
+  private GraphvizFormatter dotOut;
+
+  /** Expert: set this to produce graphviz (dot) output of
+   *  the Viterbi lattice */
+  public void setGraphvizFormatter(GraphvizFormatter dotOut) {
+    this.dotOut = dotOut;
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+    buffer.reset(input);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    buffer.reset(input);
+    resetState();
+  }
+
+  private void resetState() {
+    positions.reset();
+    pos = 0;
+    end = false;
+    lastBackTracePos = 0;
+    pending.clear();
+
+    // Add BOS:
+    positions.get(0).add(0, 0, -1, -1, -1, -1, Type.KNOWN);
+  }
+
+  @Override
+  public void end() throws IOException {
+    super.end();
+    // Set final offset
+    int finalOffset = correctOffset(pos);
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }
+
+  // Holds all back pointers arriving to this position:
+  final static class Position {
+
+    int pos;
+
+    int count;
+
+    // maybe single int array * 5?
+    int[] costs = new int[8];
+    int[] lastRightID = new int[8];
+    int[] backPos = new int[8];
+    int[] backWordPos = new int[8];
+    int[] backIndex = new int[8];
+    int[] backID = new int[8];
+    Type[] backType = new Type[8];
+
+    public void grow() {
+      costs = ArrayUtil.grow(costs, 1+count);
+      lastRightID = ArrayUtil.grow(lastRightID, 1+count);
+      backPos = ArrayUtil.grow(backPos, 1+count);
+      backWordPos = ArrayUtil.grow(backWordPos, 1+count);
+      backIndex = ArrayUtil.grow(backIndex, 1+count);
+      backID = ArrayUtil.grow(backID, 1+count);
+
+      // NOTE: sneaky: grow separately because
+      // ArrayUtil.grow will otherwise pick a different
+      // length than the int[]s we just grew:
+      final Type[] newBackType = new Type[backID.length];
+      System.arraycopy(backType, 0, newBackType, 0, backType.length);
+      backType = newBackType;
+    }
+
+    public void add(int cost, int lastRightID, int backPos, int backRPos, int backIndex, int backID, Type backType) {
+      // NOTE: this isn't quite a true Viterbi search,
+      // because we should check if lastRightID is
+      // already present here, and only update if the new
+      // cost is less than the current cost, instead of
+      // simply appending.  However, that will likely hurt
+      // performance (usually we add a lastRightID only once),
+      // and it means we actually create the full graph
+      // intersection instead of a "normal" Viterbi lattice:
+      if (count == costs.length) {
+        grow();
+      }
+      this.costs[count] = cost;
+      this.lastRightID[count] = lastRightID;
+      this.backPos[count] = backPos;
+      this.backWordPos[count] = backRPos;
+      this.backIndex[count] = backIndex;
+      this.backID[count] = backID;
+      this.backType[count] = backType;
+      count++;
+    }
+
+    public void reset() {
+      count = 0;
+    }
+  }
+
+  /**
+   * Returns the space penalty associated with the provided {@link POS.Tag}.
+   *
+   * @param leftPOS the left part of speech of the current token.
+   * @param numSpaces the number of spaces before the current token.
+   */
+  private int computeSpacePenalty(POS.Tag leftPOS, int numSpaces) {
+    int spacePenalty = 0;
+    if (numSpaces > 0) {
+      // TODO we should extract the penalty (left-space-penalty-factor) from the dicrc file.
+      switch (leftPOS) {
+        case E:
+        case J:
+        case VCP:
+        case XSA:
+        case XSN:
+        case XSV:
+          spacePenalty = 3000;
+          break;
+
+        default:
+          break;
+      }
+    }
+    return spacePenalty;
+
+  }
+
+  private void add(Dictionary dict, Position fromPosData, int wordPos, int endPos, int wordID, Type type) throws IOException {
+    final POS.Tag leftPOS = dict.getLeftPOS(wordID);
+    final int wordCost = dict.getWordCost(wordID);
+    final int leftID = dict.getLeftId(wordID);
+    int leastCost = Integer.MAX_VALUE;
+    int leastIDX = -1;
+    assert fromPosData.count > 0;
+    for(int idx=0;idx<fromPosData.count;idx++) {
+      // The number of spaces before the term
+      int numSpaces = wordPos - fromPosData.pos;
+
+      // Cost is path cost so far, plus word cost (added at
+      // end of loop), plus bigram cost and space penalty cost.
+      final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID) + computeSpacePenalty(leftPOS, numSpaces);
+      if (VERBOSE) {
+        System.out.println("      fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) +
+            " spacePenalty=" + computeSpacePenalty(leftPOS, numSpaces) + ") leftID=" + leftID + " leftPOS=" + leftPOS.name() + ")");
+      }
+      if (cost < leastCost) {
+        leastCost = cost;
+        leastIDX = idx;
+        if (VERBOSE) {
+          System.out.println("        **");
+        }
+      }
+    }
+
+    leastCost += wordCost;
+
+    if (VERBOSE) {
+      System.out.println("      + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count);
+    }
+
+    positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, wordPos, leastIDX, wordID, type);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+
+    // parse() is able to return w/o producing any new
+    // tokens, when the tokens it had produced were entirely
+    // punctuation.  So we loop here until we get a real
+    // token or we end:
+    while (pending.size() == 0) {
+      if (end) {
+        return false;
+      }
+
+      // Push Viterbi forward some more:
+      parse();
+    }
+
+    final Token token = pending.remove(pending.size()-1);
+
+    int length = token.getLength();
+    clearAttributes();
+    assert length > 0;
+    //System.out.println("off=" + token.getOffset() + " len=" + length + " vs " + token.getSurfaceForm().length);
+    termAtt.copyBuffer(token.getSurfaceForm(), token.getOffset(), length);
+    offsetAtt.setOffset(correctOffset(token.getStartOffset()), correctOffset(token.getEndOffset()));
+    posAtt.setToken(token);
+    readingAtt.setToken(token);
+    posIncAtt.setPositionIncrement(token.getPositionIncrement());
+    posLengthAtt.setPositionLength(token.getPositionLength());
+    if (VERBOSE) {
+      System.out.println(Thread.currentThread().getName() + ":    incToken: return token=" + token);
+    }
+    return true;
+  }
+
+  // TODO: make generic'd version of this "circular array"?
+  // It's a bit tricky because we do things to the Position
+  // (eg, set .pos = N on reuse)...
+  static final class WrappedPositionArray {
+    private Position[] positions = new Position[8];
+
+    public WrappedPositionArray() {
+      for(int i=0;i<positions.length;i++) {
+        positions[i] = new Position();
+      }
+    }
+
+    // Next array index to write to in positions:
+    private int nextWrite;
+
+    // Next position to write:
+    private int nextPos;
+
+    // How many valid Position instances are held in the
+    // positions array:
+    private int count;
+
+    public void reset() {
+      nextWrite--;
+      while(count > 0) {
+        if (nextWrite == -1) {
+          nextWrite = positions.length - 1;
+        }
+        positions[nextWrite--].reset();
+        count--;
+      }
+      nextWrite = 0;
+      nextPos = 0;
+      count = 0;
+    }
+
+    /** Get Position instance for this absolute position;
+     *  this is allowed to be arbitrarily far "in the
+     *  future" but cannot be before the last freeBefore. */
+    public Position get(int pos) {
+      while(pos >= nextPos) {
+        //System.out.println("count=" + count + " vs len=" + positions.length);
+        if (count == positions.length) {
+          Position[] newPositions = new Position[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+          //System.out.println("grow positions " + newPositions.length);
+          System.arraycopy(positions, nextWrite, newPositions, 0, positions.length-nextWrite);
+          System.arraycopy(positions, 0, newPositions, positions.length-nextWrite, nextWrite);
+          for(int i=positions.length;i<newPositions.length;i++) {
+            newPositions[i] = new Position();
+          }
+          nextWrite = positions.length;
+          positions = newPositions;
+        }
+        if (nextWrite == positions.length) {
+          nextWrite = 0;
+        }
+        // Should have already been reset:
+        assert positions[nextWrite].count == 0;
+        positions[nextWrite++].pos = nextPos++;
+        count++;
+      }
+      assert inBounds(pos);
+      final int index = getIndex(pos);
+      assert positions[index].pos == pos;
+      return positions[index];
+    }
+
+    public int getNextPos() {
+      return nextPos;
+    }
+
+    // For assert:
+    private boolean inBounds(int pos) {
+      return pos < nextPos && pos >= nextPos - count;
+    }
+
+    private int getIndex(int pos) {
+      int index = nextWrite - (nextPos - pos);
+      if (index < 0) {
+        index += positions.length;
+      }
+      return index;
+    }
+
+    public void freeBefore(int pos) {
+      final int toFree = count - (nextPos - pos);
+      assert toFree >= 0;
+      assert toFree <= count;
+      int index = nextWrite - count;
+      if (index < 0) {
+        index += positions.length;
+      }
+      for(int i=0;i<toFree;i++) {
+        if (index == positions.length) {
+          index = 0;
+        }
+        //System.out.println("  fb idx=" + index);
+        positions[index].reset();
+        index++;
+      }
+      count -= toFree;
+    }
+  }
+
+  /* Incrementally parse some more characters.  This runs
+   * the viterbi search forwards "enough" so that we
+   * generate some more tokens.  How much forward depends on
+   * the chars coming in, since some chars could cause
+   * longer-lasting ambiguity in the parsing.  Once the
+   * ambiguity is resolved, then we back trace, produce
+   * the pending tokens, and return. */
+  private void parse() throws IOException {
+    if (VERBOSE) {
+      System.out.println("\nPARSE");
+    }
+
+    // Index of the last character of unknown word:
+    int unknownWordEndIndex = -1;
+
+    // Advances over each position (character):
+    while (true) {
+
+      if (buffer.get(pos) == -1) {
+        // End
+        break;
+      }
+
+      final Position posData = positions.get(pos);
+      final boolean isFrontier = positions.getNextPos() == pos+1;
+
+      if (posData.count == 0) {
+        // No arcs arrive here; move to next position:
+        if (VERBOSE) {
+          System.out.println("    no arcs in; skip pos=" + pos);
+        }
+        pos++;
+        continue;
+      }
+
+      if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
+        //  if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
+        // We are at a "frontier", and only one node is
+        // alive, so whatever the eventual best path is must
+        // come through this node.  So we can safely commit
+        // to the prefix of the best path at this point:
+        backtrace(posData, 0);
+
+        // Re-base cost so we don't risk int overflow:
+        posData.costs[0] = 0;
+        if (pending.size() > 0) {
+          return;
+        } else {
+          // This means the backtrace only produced
+          // punctuation tokens, so we must keep parsing.
+        }
+      }
+
+      if (pos - lastBackTracePos >= MAX_BACKTRACE_GAP) {
+        // Safety: if we've buffered too much, force a
+        // backtrace now.  We find the least-cost partial
+        // path, across all paths, backtrace from it, and
+        // then prune all others.  Note that this, in
+        // general, can produce the wrong result, if the
+        // total best path did not in fact back trace
+        // through this partial best path.  But it's the
+        // best we can do... (short of not having a
+        // safety!).
+
+        // First pass: find least cost partial path so far,
+        // including ending at future positions:
+        int leastIDX = -1;
+        int leastCost = Integer.MAX_VALUE;
+        Position leastPosData = null;
+        for(int pos2=pos;pos2<positions.getNextPos();pos2++) {
+          final Position posData2 = positions.get(pos2);
+          for(int idx=0;idx<posData2.count;idx++) {
+            //System.out.println("    idx=" + idx + " cost=" + cost);
+            final int cost = posData2.costs[idx];
+            if (cost < leastCost) {
+              leastCost = cost;
+              leastIDX = idx;
+              leastPosData = posData2;
+            }
+          }
+        }
+
+        // We will always have at least one live path:
+        assert leastIDX != -1;
+
+        // Second pass: prune all but the best path:
+        for(int pos2=pos;pos2<positions.getNextPos();pos2++) {
+          final Position posData2 = positions.get(pos2);
+          if (posData2 != leastPosData) {
+            posData2.reset();
+          } else {
+            if (leastIDX != 0) {
+              posData2.costs[0] = posData2.costs[leastIDX];
+              posData2.lastRightID[0] = posData2.lastRightID[leastIDX];
+              posData2.backPos[0] = posData2.backPos[leastIDX];
+              posData2.backWordPos[0] = posData2.backWordPos[leastIDX];
+              posData2.backIndex[0] = posData2.backIndex[leastIDX];
+              posData2.backID[0] = posData2.backID[leastIDX];
+              posData2.backType[0] = posData2.backType[leastIDX];
+            }
+            posData2.count = 1;
+          }
+        }
+
+        backtrace(leastPosData, 0);
+
+        // Re-base cost so we don't risk int overflow:
+        Arrays.fill(leastPosData.costs, 0, leastPosData.count, 0);
+
+        if (pos != leastPosData.pos) {
+          // We jumped into a future position:
+          assert pos < leastPosData.pos;
+          pos = leastPosData.pos;
+        }
+        if (pending.size() > 0) {
+          return;
+        } else {
+          // This means the backtrace only produced
+          // punctuation tokens, so we must keep parsing.
+        }
+      }
+
+      if (VERBOSE) {
+        System.out.println("\n  extend @ pos=" + pos + " char=" + (char) buffer.get(pos) + " hex=" + Integer.toHexString(buffer.get(pos)));
+      }
+
+      if (VERBOSE) {
+        System.out.println("    " + posData.count + " arcs in");
+      }
+
+      // Move to the first character that is not a whitespace.
+      // The whitespaces are added as a prefix for the term that we extract,
+      // this information is then used when computing the cost for the term using
+      // the space penalty factor.
+      // They are removed when the final tokens are generated.
+      if (Character.getType(buffer.get(pos)) == Character.SPACE_SEPARATOR) {
+        int nextChar = buffer.get(++pos);
+        while (nextChar != -1 && Character.getType(nextChar) == Character.SPACE_SEPARATOR) {
+          pos ++;
+          nextChar = buffer.get(pos);
+        }
+      }
+      if (buffer.get(pos) == -1) {
+        pos = posData.pos;
+      }
+
+      boolean anyMatches = false;
+
+      // First try user dict:
+      if (userFST != null) {
+        userFST.getFirstArc(arc);
+        int output = 0;
+        for(int posAhead=pos;;posAhead++) {
+          final int ch = buffer.get(posAhead);
+          if (ch == -1) {
+            break;
+          }
+          if (userFST.findTargetArc(ch, arc, arc, posAhead == pos, userFSTReader) == null) {
+            break;
+          }
+          output += arc.output.intValue();
+          if (arc.isFinal()) {
+            if (VERBOSE) {
+              System.out.println("    USER word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1));
+            }
+            add(userDictionary, posData, pos, posAhead+1, output + arc.nextFinalOutput.intValue(), Type.USER);
+            anyMatches = true;
+          }
+        }
+      }
+
+      // TODO: we can be more aggressive about user
+      // matches?  if we are "under" a user match then don't
+      // extend KNOWN/UNKNOWN paths?
+
+      if (!anyMatches) {
+        // Next, try known dictionary matches
+        fst.getFirstArc(arc);
+        int output = 0;
+
+        for(int posAhead=pos;;posAhead++) {
+          final int ch = buffer.get(posAhead);
+          if (ch == -1) {
+            break;
+          }
+          //System.out.println("    match " + (char) ch + " posAhead=" + posAhead);
+
+          if (fst.findTargetArc(ch, arc, arc, posAhead == pos, fstReader) == null) {
+            break;
+          }
+
+          output += arc.output.intValue();
+
+          // Optimization: for known words that are too-long
+          // (compound), we should pre-compute the 2nd
+          // best segmentation and store it in the
+          // dictionary instead of recomputing it each time a
+          // match is found.
+
+          if (arc.isFinal()) {
+            dictionary.lookupWordIds(output + arc.nextFinalOutput.intValue(), wordIdRef);
+            if (VERBOSE) {
+              System.out.println("    KNOWN word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1) + " " + wordIdRef.length + " wordIDs");
+            }
+            for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
+              add(dictionary, posData, pos, posAhead+1, wordIdRef.ints[wordIdRef.offset + ofs], Type.KNOWN);
+              anyMatches = true;
+            }
+          }
+        }
+      }
+
+      if (unknownWordEndIndex > posData.pos) {
+        pos++;
+        continue;
+      }
+
+      final char firstCharacter = (char) buffer.get(pos);
+      if (!anyMatches || characterDefinition.isInvoke(firstCharacter)) {
+
+        // Find unknown match:
+        final int characterId = characterDefinition.getCharacterClass(firstCharacter);
+        final boolean isPunct = isPunctuation(firstCharacter);
+
+        // NOTE: copied from UnknownDictionary.lookup:
+        int unknownWordLength;
+        if (!characterDefinition.isGroup(firstCharacter)) {
+          unknownWordLength = 1;
+        } else {
+          // Extract unknown word. Characters with the same character class are considered to be part of unknown word
+          unknownWordLength = 1;
+          for (int posAhead = pos + 1; unknownWordLength < MAX_UNKNOWN_WORD_LENGTH; posAhead++) {
+            final int ch = buffer.get(posAhead);
+            if (ch == -1) {
+              break;
+            }
+            if (characterId == characterDefinition.getCharacterClass((char) ch) &&
+                isPunctuation((char) ch) == isPunct) {
+              unknownWordLength++;
+            } else {
+              break;
+            }
+          }
+        }
+
+        unkDictionary.lookupWordIds(characterId, wordIdRef); // characters in input text are supposed to be the same
+        if (VERBOSE) {
+          System.out.println("    UNKNOWN word len=" + unknownWordLength + " " + wordIdRef.length + " wordIDs");
+        }
+        for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
+          add(unkDictionary, posData, pos, pos + unknownWordLength, wordIdRef.ints[wordIdRef.offset + ofs], Type.UNKNOWN);
+        }
+      }
+
+      pos++;
+    }
+
+    end = true;
+
+    if (pos > 0) {
+
+      final Position endPosData = positions.get(pos);
+      int leastCost = Integer.MAX_VALUE;
+      int leastIDX = -1;
+      if (VERBOSE) {
+        System.out.println("  end: " + endPosData.count + " nodes");
+      }
+      for(int idx=0;idx<endPosData.count;idx++) {
+        // Add EOS cost:
+        final int cost = endPosData.costs[idx] + costs.get(endPosData.lastRightID[idx], 0);
+        //System.out.println("    idx=" + idx + " cost=" + cost + " (pathCost=" + endPosData.costs[idx] + " bgCost=" + costs.get(endPosData.lastRightID[idx], 0) + ") backPos=" + endPosData.backPos[idx]);
+        if (cost < leastCost) {
+          leastCost = cost;
+          leastIDX = idx;
+        }
+      }
+
+      backtrace(endPosData, leastIDX);
+    } else {
+      // No characters in the input string; return no tokens!
+    }
+  }
+
+  // the pending list.  The pending list is then in-reverse
+  // (last token should be returned first).
+  private void backtrace(final Position endPosData, final int fromIDX) {
+    final int endPos = endPosData.pos;
+
+    if (VERBOSE) {
+      System.out.println("\n  backtrace: endPos=" + endPos + " pos=" + pos + "; " + (pos - lastBackTracePos) + " characters; last=" + lastBackTracePos + " cost=" + endPosData.costs[fromIDX]);
+    }
+
+    final char[] fragment = buffer.get(lastBackTracePos, endPos-lastBackTracePos);
+
+    if (dotOut != null) {
+      dotOut.onBacktrace(this, positions, lastBackTracePos, endPosData, fromIDX, fragment, end);
+    }
+
+    int pos = endPos;
+    int bestIDX = fromIDX;
+
+    // TODO: sort of silly to make Token instances here; the
+    // back trace has all info needed to generate the
+    // token.  So, we could just directly set the attrs,
+    // from the backtrace, in incrementToken w/o ever
+    // creating Token; we'd have to defer calling freeBefore
+    // until after the backtrace was fully "consumed" by
+    // incrementToken.
+
+    while (pos > lastBackTracePos) {
+      //System.out.println("BT: back pos=" + pos + " bestIDX=" + bestIDX);
+      final Position posData = positions.get(pos);
+      assert bestIDX < posData.count;
+
+      int backPos = posData.backPos[bestIDX];
+      int backWordPos = posData.backWordPos[bestIDX];
+      assert backPos >= lastBackTracePos: "backPos=" + backPos + " vs lastBackTracePos=" + lastBackTracePos;
+      // the length of the word without the whitespaces at the beginning.
+      int length = pos - backWordPos;
+      Type backType = posData.backType[bestIDX];
+      int backID = posData.backID[bestIDX];
+      int nextBestIDX = posData.backIndex[bestIDX];
+      // the start of the word after the whitespace at the beginning.
+      final int fragmentOffset = backWordPos - lastBackTracePos;
+      assert fragmentOffset >= 0;
+
+      final Dictionary dict = getDict(backType);
+
+      if (outputUnknownUnigrams && backType == Type.UNKNOWN) {
+        // outputUnknownUnigrams converts unknown word into unigrams:
+        for (int i = length - 1; i >= 0; i--) {
+          int charLen = 1;
+          if (i > 0 && Character.isLowSurrogate(fragment[fragmentOffset + i])) {
+            i--;
+            charLen = 2;
+          }
+          final DictionaryToken token = new DictionaryToken(Type.UNKNOWN,
+              unkDictionary,
+              CharacterDefinition.NGRAM,
+              fragment,
+              fragmentOffset+i,
+              charLen,
+              backWordPos+i,
+              backWordPos+i+charLen
+          );
+          if (shouldFilterToken(token) == false) {
+            pending.add(token);
+            if (VERBOSE) {
+              System.out.println("    add token=" + pending.get(pending.size() - 1));
+            }
+          }
+        }
+      } else {
+        final DictionaryToken token = new DictionaryToken(backType,
+            dict,
+            backID,
+            fragment,
+            fragmentOffset,
+            length,
+            backWordPos,
+            backWordPos + length
+        );
+        if (token.getPOSType() == POS.Type.MORPHEME || mode == DecompoundMode.NONE) {
+          if (shouldFilterToken(token) == false) {
+            pending.add(token);
+            if (VERBOSE) {
+              System.out.println("    add token=" + pending.get(pending.size() - 1));
+            }
+          }
+        } else {
+          Dictionary.Morpheme[] morphemes = token.getMorphemes();
+          if (morphemes == null) {
+            pending.add(token);
+            if (VERBOSE) {
+              System.out.println("    add token=" + pending.get(pending.size() - 1));
+            }
+          } else {
+            int endOffset = backWordPos + length;
+            int posLen = 0;
+            // decompose the compound
+            for (int i = morphemes.length - 1; i >= 0; i--) {
+              final Dictionary.Morpheme morpheme = morphemes[i];
+              final Token compoundToken;
+              if (token.getPOSType() == POS.Type.COMPOUND) {
+                assert endOffset - morpheme.surfaceForm.length() >= 0;
+                compoundToken = new DecompoundToken(morpheme.posTag, morpheme.surfaceForm,
+                    endOffset - morpheme.surfaceForm.length(), endOffset);
+              } else {
+                compoundToken = new DecompoundToken(morpheme.posTag, morpheme.surfaceForm, token.getStartOffset(), token.getEndOffset());
+              }
+              if (i == 0 && mode == DecompoundMode.MIXED) {
+                compoundToken.setPositionIncrement(0);
+              }
+              ++ posLen;
+              endOffset -= morpheme.surfaceForm.length();
+              pending.add(compoundToken);
+              if (VERBOSE) {
+                System.out.println("    add token=" + pending.get(pending.size() - 1));
+              }
+            }
+            if (mode == DecompoundMode.MIXED) {
+              token.setPositionLength(Math.max(1, posLen));
+              pending.add(token);
+              if (VERBOSE) {
+                System.out.println("    add token=" + pending.get(pending.size() - 1));
+              }
+            }
+          }
+        }
+      }
+
+      pos = backPos;
+      bestIDX = nextBestIDX;
+    }
+
+    lastBackTracePos = endPos;
+
+    if (VERBOSE) {
+      System.out.println("  freeBefore pos=" + endPos);
+    }
+    // Notify the circular buffers that we are done with
+    // these positions:
+    buffer.freeBefore(endPos);
+    positions.freeBefore(endPos);
+  }
+
+  Dictionary getDict(Type type) {
+    return dictionaryMap.get(type);
+  }
+
+  private boolean shouldFilterToken(Token token) {
+    return isPunctuation(token.getSurfaceForm()[token.getOffset()]);
+  }
+
+  private static boolean isPunctuation(char ch) {
+    switch(Character.getType(ch)) {
+      case Character.SPACE_SEPARATOR:
+      case Character.LINE_SEPARATOR:
+      case Character.PARAGRAPH_SEPARATOR:
+      case Character.CONTROL:
+      case Character.FORMAT:
+      case Character.DASH_PUNCTUATION:
+      case Character.START_PUNCTUATION:
+      case Character.END_PUNCTUATION:
+      case Character.CONNECTOR_PUNCTUATION:
+      case Character.OTHER_PUNCTUATION:
+      case Character.MATH_SYMBOL:
+      case Character.CURRENCY_SYMBOL:
+      case Character.MODIFIER_SYMBOL:
+      case Character.OTHER_SYMBOL:
+      case Character.INITIAL_QUOTE_PUNCTUATION:
+      case Character.FINAL_QUOTE_PUNCTUATION:
+        return true;
+      default:
+        return false;
+    }
+  }
+}

[42/46] lucene-solr:jira/solr-11833: SOLR-12028: Remove BadApple for CollectionsAPIDistributedZkTest.testCollectionReload()

Posted by ab...@apache.org.

SOLR-12028: Remove BadApple for CollectionsAPIDistributedZkTest.testCollectionReload()


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3028f3e9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3028f3e9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3028f3e9

Branch: refs/heads/jira/solr-11833
Commit: 3028f3e9ea337f9b8b0d021ecf3f2cb54217c566
Parents: dad2d10
Author: Cao Manh Dat <da...@apache.org>
Authored: Mon Apr 16 10:26:22 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Mon Apr 16 10:26:22 2018 +0700

----------------------------------------------------------------------
 .../solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3028f3e9/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
index f970d4b..2901961 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/CollectionsAPIDistributedZkTest.java
@@ -483,7 +483,6 @@ public class CollectionsAPIDistributedZkTest extends SolrCloudTestCase {
   }
 
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 26-Mar-2018
   public void testCollectionReload() throws Exception {
 
     final String collectionName = "reloaded_collection";

[35/46] lucene-solr:jira/solr-11833: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

Posted by ab...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8462b134
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8462b134
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8462b134

Branch: refs/heads/jira/solr-11833
Commit: 8462b134eaf9168d0ba48d7324db04c5441bce7a
Parents: d78c354 12bd5f9
Author: Karl Wright <Da...@gmail.com>
Authored: Fri Apr 13 12:28:03 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Fri Apr 13 12:28:03 2018 -0400

----------------------------------------------------------------------
 .../search/spell/LuceneLevenshteinDistance.java |   8 ++
 solr/CHANGES.txt                                |   5 +
 .../handler/dataimport/DataImportHandler.java   |  37 +++----
 .../solr/response/TextResponseWriter.java       |  27 ++---
 .../spelling/ConjunctionSolrSpellChecker.java   |   3 +-
 .../ConjunctionSolrSpellCheckerTest.java        |  35 +++++--
 .../common/params/ModifiableSolrParams.java     |  20 ++--
 .../solr/common/params/MultiMapSolrParams.java  |  11 +-
 .../apache/solr/common/params/SolrParams.java   | 102 +++++++++++++++----
 .../solr/common/params/SolrParamTest.java       |  80 +++++++++++----
 10 files changed, 239 insertions(+), 89 deletions(-)
----------------------------------------------------------------------

[25/46] lucene-solr:jira/solr-11833: LUCENE-8231: Add a new analysis module (nori) similar to Kuromoji but to handle Korean

Posted by ab...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java
new file mode 100644
index 0000000..fd574ce
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanAnalyzer.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Random;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+/**
+ * Test Korean morphological analyzer
+ */
+public class TestKoreanAnalyzer extends BaseTokenStreamTestCase {
+  public void testSentence() throws IOException {
+    Analyzer a = new KoreanAnalyzer();
+    assertAnalyzesTo(a, "한국은 대단한 나라입니다.",
+        new String[]{"한국", "대단", "나라", "이"},
+        new int[]{ 0, 4, 8, 10 },
+        new int[]{ 2, 6, 10, 13 },
+        new int[]{ 1, 2, 3, 1 }
+    );
+    a.close();
+  }
+
+  public void testStopTags() throws IOException {
+    Set<POS.Tag> stopTags = Arrays.asList(POS.Tag.NNP, POS.Tag.NNG).stream().collect(Collectors.toSet());
+    Analyzer a = new KoreanAnalyzer(null, KoreanTokenizer.DecompoundMode.DISCARD, stopTags, false);
+    assertAnalyzesTo(a, "한국은 대단한 나라입니다.",
+        new String[]{"은", "대단", "하", "ᆫ", "이", "ᄇ니다"},
+        new int[]{ 2, 4, 6, 6, 10, 10 },
+        new int[]{ 3, 6, 7, 7, 13, 13 },
+        new int[]{ 2, 1, 1, 1, 2, 1 }
+    );
+    a.close();
+  }
+
+  public void testUnknownWord() throws IOException {
+    Analyzer a = new KoreanAnalyzer(null, KoreanTokenizer.DecompoundMode.DISCARD,
+        KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS, true);
+
+    assertAnalyzesTo(a,"2018 평창 동계올림픽대회",
+        new String[]{"2", "0", "1", "8", "평창", "동계", "올림픽", "대회"},
+        new int[]{0, 1, 2, 3, 5, 8, 10, 13},
+        new int[]{1, 2, 3, 4, 7, 10, 13, 15},
+        new int[]{1, 1, 1, 1, 1, 1, 1, 1});
+    a.close();
+
+    a = new KoreanAnalyzer(null, KoreanTokenizer.DecompoundMode.DISCARD,
+        KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS, false);
+
+    assertAnalyzesTo(a,"2018 평창 동계올림픽대회",
+        new String[]{"2018", "평창", "동계", "올림픽", "대회"},
+        new int[]{0, 5, 8, 10, 13},
+        new int[]{4, 7, 10, 13, 15},
+        new int[]{1, 1, 1, 1, 1});
+    a.close();
+  }
+
+  /**
+   * blast random strings against the analyzer
+   */
+  public void testRandom() throws IOException {
+    Random random = random();
+    final Analyzer a = new KoreanAnalyzer();
+    checkRandomData(random, a, atLeast(1000));
+    a.close();
+  }
+
+  /**
+   * blast some random large strings through the analyzer
+   */
+  public void testRandomHugeStrings() throws Exception {
+    Random random = random();
+    final Analyzer a = new KoreanAnalyzer();
+    checkRandomData(random, a, 2 * RANDOM_MULTIPLIER, 8192);
+    a.close();
+  }
+
+  // Copied from TestKoreanTokenizer, to make sure passing
+  // user dict to analyzer works:
+  public void testUserDict() throws IOException {
+    final Analyzer analyzer = new KoreanAnalyzer(TestKoreanTokenizer.readDict(),
+        KoreanTokenizer.DEFAULT_DECOMPOUND, KoreanPartOfSpeechStopFilter.DEFAULT_STOP_TAGS, false);
+    assertAnalyzesTo(analyzer, "c++ 프로그래밍 언어",
+        new String[]{"c++", "프로그래밍", "언어"},
+        new int[]{0, 4, 10},
+        new int[]{3, 9, 12},
+        new int[]{1, 1, 1}
+    );
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java
new file mode 100644
index 0000000..5486f3f
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanPartOfSpeechStopFilterFactory.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Version;
+
+/**
+ * Simple tests for {@link KoreanPartOfSpeechStopFilterFactory}
+ */
+public class TestKoreanPartOfSpeechStopFilterFactory extends BaseTokenStreamTestCase {
+  public void testStopTags() throws IOException {
+    KoreanTokenizerFactory tokenizerFactory = new KoreanTokenizerFactory(new HashMap<String,String>());
+    tokenizerFactory.inform(new StringMockResourceLoader(""));
+    TokenStream ts = tokenizerFactory.create();
+    ((Tokenizer)ts).setReader(new StringReader(" 한국은 대단한 나라입니다."));
+    Map<String,String> args = new HashMap<>();
+    args.put("luceneMatchVersion", Version.LATEST.toString());
+    args.put("tags", "E, J");
+    KoreanPartOfSpeechStopFilterFactory factory = new KoreanPartOfSpeechStopFilterFactory(args);
+    ts = factory.create(ts);
+    assertTokenStreamContents(ts,
+        new String[] { "한국", "대단", "하", "나라", "이" }
+    );
+  }
+
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+      new KoreanPartOfSpeechStopFilterFactory(new HashMap<String,String>() {{
+        put("luceneMatchVersion", Version.LATEST.toString());
+        put("bogusArg", "bogusValue");
+      }});
+    });
+    assertTrue(expected.getMessage().contains("Unknown parameters"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilter.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilter.java
new file mode 100644
index 0000000..0c5a40c
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilter.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Tests for {@link TestKoreanReadingFormFilter}
+ */
+public class TestKoreanReadingFormFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer;
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer =
+            new KoreanTokenizer(newAttributeFactory(), null, KoreanTokenizer.DecompoundMode.DISCARD, false);
+        return new TokenStreamComponents(tokenizer, new KoreanReadingFormFilter(tokenizer));
+      }
+    };
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    IOUtils.close(analyzer);
+    super.tearDown();
+  }
+
+  public void testReadings() throws IOException {
+    assertAnalyzesTo(analyzer, "車丞相",
+        new String[] { "차", "승상" }
+    );
+  }
+  
+  public void testRandomData() throws IOException {
+    Random random = random();
+    checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
+  }
+  
+  public void testEmptyTerm() throws IOException {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KeywordTokenizer();
+        return new TokenStreamComponents(tokenizer, new KoreanReadingFormFilter(tokenizer));
+      }
+    };
+    checkOneTerm(a, "", "");
+    a.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java
new file mode 100644
index 0000000..f058a44
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanReadingFormFilterFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests for {@link KoreanReadingFormFilterFactory}
+ */
+public class TestKoreanReadingFormFilterFactory extends BaseTokenStreamTestCase {
+  public void testReadings() throws IOException {
+    KoreanTokenizerFactory tokenizerFactory = new KoreanTokenizerFactory(new HashMap<>());
+    tokenizerFactory.inform(new StringMockResourceLoader(""));
+    TokenStream tokenStream = tokenizerFactory.create();
+    ((Tokenizer)tokenStream).setReader(new StringReader("丞相"));
+    KoreanReadingFormFilterFactory filterFactory = new KoreanReadingFormFilterFactory(new HashMap<>());
+    assertTokenStreamContents(filterFactory.create(tokenStream),
+        new String[] { "승상" }
+    );
+  }
+  
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+      new KoreanReadingFormFilterFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+    });
+    assertTrue(expected.getMessage().contains("Unknown parameters"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java
new file mode 100644
index 0000000..0471e5f
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizer.java
@@ -0,0 +1,355 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.util.Random;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockGraphTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
+import org.apache.lucene.analysis.ko.dict.UserDictionary;
+import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute;
+import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute;
+
+public class TestKoreanTokenizer extends BaseTokenStreamTestCase {
+  private Analyzer analyzer, analyzerUnigram, analyzerDecompound, analyzerDecompoundKeep, analyzerReading;
+
+  public static UserDictionary readDict() {
+    InputStream is = TestKoreanTokenizer.class.getResourceAsStream("userdict.txt");
+    if (is == null) {
+      throw new RuntimeException("Cannot find userdict.txt in test classpath!");
+    }
+    try {
+      try {
+        Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8);
+        return UserDictionary.open(reader);
+      } finally {
+        is.close();
+      }
+    } catch (IOException ioe) {
+      throw new RuntimeException(ioe);
+    }
+  }
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    UserDictionary userDictionary = readDict();
+    analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KoreanTokenizer(newAttributeFactory(), userDictionary,
+            DecompoundMode.NONE, false);
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      }
+    };
+    analyzerUnigram = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KoreanTokenizer(newAttributeFactory(), userDictionary,
+            DecompoundMode.NONE, true);
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      }
+    };
+    analyzerDecompound = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KoreanTokenizer(newAttributeFactory(), userDictionary,
+            DecompoundMode.DISCARD, false);
+        return new TokenStreamComponents(tokenizer);
+      }
+    };
+    analyzerDecompoundKeep = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KoreanTokenizer(newAttributeFactory(), userDictionary,
+            DecompoundMode.MIXED, false);
+        return new TokenStreamComponents(tokenizer);
+      }
+    };
+    analyzerReading = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KoreanTokenizer(newAttributeFactory(), userDictionary,
+            DecompoundMode.NONE, false);
+        KoreanReadingFormFilter filter = new KoreanReadingFormFilter(tokenizer);
+        return new TokenStreamComponents(tokenizer, filter);
+      }
+    };
+  }
+
+  public void testSpaces() throws IOException {
+    assertAnalyzesTo(analyzer, "화학        이외의         것",
+        new String[]{"화학", "이외", "의", "것"},
+        new int[]{0, 10, 12, 22},
+        new int[]{2, 12, 13, 23},
+        new int[]{1, 1, 1, 1}
+    );
+    assertPartsOfSpeech(analyzer, "화학 이외의         것",
+        new POS.Type[] { POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME },
+        new POS.Tag[] { POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNB },
+        new POS.Tag[] { POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNB }
+    );
+  }
+
+  public void testPartOfSpeechs() throws IOException {
+    assertAnalyzesTo(analyzer, "화학 이외의 것",
+        new String[]{"화학", "이외", "의", "것"},
+        new int[]{0, 3, 5, 7},
+        new int[]{2, 5, 6, 8},
+        new int[]{1, 1, 1, 1}
+    );
+    assertPartsOfSpeech(analyzer, "화학 이외의 것",
+        new POS.Type[] { POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME },
+        new POS.Tag[] { POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNB },
+        new POS.Tag[] { POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNB }
+    );
+  }
+
+  public void testPartOfSpeechsWithCompound() throws IOException {
+    assertAnalyzesTo(analyzer, "가락지나물은 한국, 중국, 일본",
+        new String[]{"가락지나물", "은", "한국", "중국", "일본"},
+        new int[]{0, 5, 7, 11, 15},
+        new int[]{5, 6, 9, 13, 17},
+        new int[]{1, 1, 1, 1, 1}
+    );
+
+    assertPartsOfSpeech(analyzer,"가락지나물은 한국, 중국, 일본",
+        new POS.Type[]{POS.Type.COMPOUND, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.J, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.J, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP}
+    );
+
+    assertAnalyzesTo(analyzerDecompound, "가락지나물은 한국, 중국, 일본",
+        new String[]{"가락지", "나물", "은", "한국", "중국", "일본"},
+        new int[]{0, 3, 5, 7, 11, 15},
+        new int[]{3, 5, 6, 9, 13, 17},
+        new int[]{1, 1, 1, 1, 1, 1}
+    );
+
+    assertAnalyzesTo(analyzerDecompoundKeep, "가락지나물은 한국, 중국, 일본",
+        new String[]{"가락지나물", "가락지", "나물", "은", "한국", "중국", "일본"},
+        new int[]{0, 0, 3, 5, 7, 11, 15},
+        new int[]{5, 3, 5, 6, 9, 13, 17},
+        null,
+        new int[]{1, 0, 1, 1, 1, 1, 1},
+        new int[]{2, 1, 1, 1, 1, 1, 1}
+    );
+
+    assertPartsOfSpeech(analyzerDecompound,"가락지나물은 한국, 중국, 일본",
+        new POS.Type[]{POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP}
+    );
+
+    assertPartsOfSpeech(analyzerDecompoundKeep,"가락지나물은 한국, 중국, 일본",
+        new POS.Type[]{POS.Type.COMPOUND, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.NNG, POS.Tag.J, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP}
+    );
+  }
+
+  public void testPartOfSpeechsWithInflects() throws IOException {
+    assertAnalyzesTo(analyzer, "감싸여",
+        new String[]{"감싸여"},
+        new int[]{0},
+        new int[]{3},
+        new int[]{1}
+    );
+
+    assertPartsOfSpeech(analyzer, "감싸여",
+        new POS.Type[]{POS.Type.INFLECT},
+        new POS.Tag[]{POS.Tag.VV},
+        new POS.Tag[]{POS.Tag.E}
+    );
+
+    assertAnalyzesTo(analyzerDecompound, "감싸여",
+        new String[]{"감싸이", "어"},
+        new int[]{0, 0},
+        new int[]{3, 3},
+        new int[]{1, 1}
+    );
+
+    assertAnalyzesTo(analyzerDecompoundKeep, "감싸여",
+        new String[]{"감싸여", "감싸이", "어"},
+        new int[]{0, 0, 0},
+        new int[]{3, 3, 3},
+        null,
+        new int[]{1, 0, 1},
+        new int[]{2, 1, 1}
+    );
+
+    assertPartsOfSpeech(analyzerDecompound, "감싸여",
+        new POS.Type[]{POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.VV, POS.Tag.E},
+        new POS.Tag[]{POS.Tag.VV, POS.Tag.E}
+    );
+
+    assertPartsOfSpeech(analyzerDecompoundKeep, "감싸여",
+        new POS.Type[]{POS.Type.INFLECT, POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.VV, POS.Tag.VV, POS.Tag.E},
+        new POS.Tag[]{POS.Tag.E, POS.Tag.VV, POS.Tag.E}
+    );
+  }
+
+  public void testUnknownWord() throws IOException {
+    assertAnalyzesTo(analyzer,"2018 평창 동계올림픽대회",
+        new String[]{"2018", "평창", "동계", "올림픽", "대회"},
+        new int[]{0, 5, 8, 10, 13},
+        new int[]{4, 7, 10, 13, 15},
+        new int[]{1, 1, 1, 1, 1});
+
+    assertPartsOfSpeech(analyzer,"2018 평창 동계올림픽대회",
+        new POS.Type[]{POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.SN, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNG},
+        new POS.Tag[]{POS.Tag.SN, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNG}
+    );
+
+    assertAnalyzesTo(analyzerUnigram,"2018 평창 동계올림픽대회",
+        new String[]{"2", "0", "1", "8", "평창", "동계", "올림픽", "대회"},
+        new int[]{0, 1, 2, 3, 5, 8, 10, 13},
+        new int[]{1, 2, 3, 4, 7, 10, 13, 15},
+        new int[]{1, 1, 1, 1, 1, 1, 1, 1});
+
+    assertPartsOfSpeech(analyzerUnigram,"2018 평창 동계올림픽대회",
+        new POS.Type[]{POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME,},
+        new POS.Tag[]{POS.Tag.SY, POS.Tag.SY, POS.Tag.SY, POS.Tag.SY, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNG},
+        new POS.Tag[]{POS.Tag.SY, POS.Tag.SY, POS.Tag.SY, POS.Tag.SY, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNP, POS.Tag.NNG}
+    );
+  }
+
+  public void testReading() throws IOException {
+    assertReadings(analyzer, "喜悲哀歡", "희비애환");
+    assertReadings(analyzer, "五朔居廬", "오삭거려");
+    assertReadings(analyzer, "가늘라", new String[] {null});
+    assertAnalyzesTo(analyzerReading,"喜悲哀歡",
+        new String[]{"희비애환"},
+        new int[]{0},
+        new int[]{4},
+        new int[]{1});
+    assertAnalyzesTo(analyzerReading,"五朔居廬",
+        new String[]{"오삭거려"},
+        new int[]{0},
+        new int[]{4},
+        new int[]{1});
+    assertAnalyzesTo(analyzerReading,"가늘라",
+        new String[]{"가늘라"},
+        new int[]{0},
+        new int[]{3},
+        new int[]{1});
+  }
+
+  public void testUserDict() throws IOException {
+    assertAnalyzesTo(analyzer, "c++ 프로그래밍 언어",
+        new String[]{"c++", "프로그래밍", "언어"},
+        new int[]{0, 4, 10},
+        new int[]{3, 9, 12},
+        new int[]{1, 1, 1}
+    );
+
+    assertPartsOfSpeech(analyzer, "c++ 프로그래밍 언어",
+        new POS.Type[]{POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.NNG},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.NNG}
+    );
+
+    assertAnalyzesTo(analyzerDecompound, "정부세종청사",
+        new String[]{"정부", "세종", "청사"},
+        new int[]{0, 2, 4},
+        new int[]{2, 4, 6},
+        new int[]{1, 1, 1}
+    );
+
+    assertPartsOfSpeech(analyzerDecompound, "정부세종청사",
+        new POS.Type[]{POS.Type.MORPHEME, POS.Type.MORPHEME, POS.Type.MORPHEME},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.NNG},
+        new POS.Tag[]{POS.Tag.NNG, POS.Tag.NNG, POS.Tag.NNG}
+    );
+  }
+
+
+  /** blast some random strings through the tokenizer */
+  public void testRandomStrings() throws Exception {
+    checkRandomData(random(), analyzer, 500*RANDOM_MULTIPLIER);
+    checkRandomData(random(), analyzerUnigram, 500*RANDOM_MULTIPLIER);
+    checkRandomData(random(), analyzerDecompound, 500*RANDOM_MULTIPLIER);
+  }
+
+  /** blast some random large strings through the tokenizer */
+  public void testRandomHugeStrings() throws Exception {
+    Random random = random();
+    checkRandomData(random, analyzer, 20*RANDOM_MULTIPLIER, 8192);
+    checkRandomData(random, analyzerUnigram, 20*RANDOM_MULTIPLIER, 8192);
+    checkRandomData(random, analyzerDecompound, 20*RANDOM_MULTIPLIER, 8192);
+  }
+
+  public void testRandomHugeStringsMockGraphAfter() throws Exception {
+    // Randomly inject graph tokens after KoreanTokenizer:
+    Random random = random();
+    Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new KoreanTokenizer(newAttributeFactory(), null, DecompoundMode.MIXED, false);
+        TokenStream graph = new MockGraphTokenFilter(random(), tokenizer);
+        return new TokenStreamComponents(tokenizer, graph);
+      }
+    };
+    checkRandomData(random, analyzer, 20*RANDOM_MULTIPLIER, 8192);
+    analyzer.close();
+  }
+
+  private void assertReadings(Analyzer analyzer, String input, String... readings) throws IOException {
+    try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+      ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
+      ts.reset();
+      for(String reading : readings) {
+        assertTrue(ts.incrementToken());
+        assertEquals(reading, readingAtt.getReading());
+      }
+      assertFalse(ts.incrementToken());
+      ts.end();
+    }
+  }
+
+  private void assertPartsOfSpeech(Analyzer analyzer, String input, POS.Type[] posTypes, POS.Tag[] leftPosTags, POS.Tag[] rightPosTags) throws IOException {
+    assert posTypes.length == leftPosTags.length && posTypes.length == rightPosTags.length;
+    try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+      PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
+      ts.reset();
+      for (int i = 0; i < posTypes.length; i++) {
+        POS.Type posType = posTypes[i];
+        POS.Tag leftTag = leftPosTags[i];
+        POS.Tag rightTag = rightPosTags[i];
+        assertTrue(ts.incrementToken());
+        assertEquals(posType, partOfSpeechAtt.getPOSType());
+        assertEquals(leftTag, partOfSpeechAtt.getLeftPOS());
+        assertEquals(rightTag, partOfSpeechAtt.getRightPOS());
+      }
+      assertFalse(ts.incrementToken());
+      ts.end();
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java
new file mode 100644
index 0000000..755af64
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/TestKoreanTokenizerFactory.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Simple tests for {@link KoreanTokenizerFactory}
+ */
+public class TestKoreanTokenizerFactory extends BaseTokenStreamTestCase {
+  public void testSimple() throws IOException {
+    KoreanTokenizerFactory factory = new KoreanTokenizerFactory(Collections.emptyMap());
+    factory.inform(new StringMockResourceLoader(""));
+    TokenStream ts = factory.create(newAttributeFactory());
+    ((Tokenizer)ts).setReader(new StringReader("안녕하세요"));
+    assertTokenStreamContents(ts,
+        new String[] { "안녕", "하", "시", "어요" },
+        new int[] { 0, 2, 3, 3 },
+        new int[] { 2, 3, 5, 5 }
+    );
+  }
+
+  /**
+   * Test decompoundMode
+   */
+  public void testDiscardDecompound() throws IOException {
+    Map<String,String> args = new HashMap<>();
+    args.put("decompoundMode", "discard");
+    KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args);
+    factory.inform(new StringMockResourceLoader(""));
+    TokenStream ts = factory.create(newAttributeFactory());
+    ((Tokenizer)ts).setReader(new StringReader("갠지스강"));
+    assertTokenStreamContents(ts,
+        new String[] { "갠지스", "강" }
+    );
+  }
+
+  public void testNoDecompound() throws IOException {
+    Map<String,String> args = new HashMap<>();
+    args.put("decompoundMode", "none");
+    KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args);
+    factory.inform(new StringMockResourceLoader(""));
+    TokenStream ts = factory.create(newAttributeFactory());
+    ((Tokenizer)ts).setReader(new StringReader("갠지스강"));
+    assertTokenStreamContents(ts,
+        new String[] { "갠지스강" }
+    );
+  }
+
+  public void testMixedDecompound() throws IOException {
+    Map<String,String> args = new HashMap<>();
+    args.put("decompoundMode", "mixed");
+    KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args);
+    factory.inform(new StringMockResourceLoader(""));
+    TokenStream ts = factory.create(newAttributeFactory());
+    ((Tokenizer)ts).setReader(new StringReader("갠지스강"));
+    assertTokenStreamContents(ts,
+        new String[] { "갠지스강", "갠지스", "강" }
+    );
+  }
+
+  /**
+   * Test user dictionary
+   */
+  public void testUserDict() throws IOException {
+    String userDict =
+        "# Additional nouns\n" +
+        "세종시 세종 시\n" +
+        "# \n" +
+        "c++\n";
+    Map<String,String> args = new HashMap<>();
+    args.put("userDictionary", "userdict.txt");
+    KoreanTokenizerFactory factory = new KoreanTokenizerFactory(args);
+    factory.inform(new StringMockResourceLoader(userDict));
+    TokenStream ts = factory.create(newAttributeFactory());
+    ((Tokenizer)ts).setReader(new StringReader("세종시"));
+    assertTokenStreamContents(ts,
+        new String[] { "세종", "시" }
+    );
+  }
+
+  /** Test that bogus arguments result in exception */
+  public void testBogusArguments() throws Exception {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+      new KoreanTokenizerFactory(new HashMap<String,String>() {{
+        put("bogusArg", "bogusValue");
+      }});
+    });
+    assertTrue(expected.getMessage().contains("Unknown parameters"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java
new file mode 100644
index 0000000..d278841
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.IntsRefFSTEnum;
+import org.apache.lucene.util.fst.IntsRefFSTEnum.InputOutput;
+
+public class TestTokenInfoDictionary extends LuceneTestCase {
+
+  /** enumerates the entire FST/lookup data and just does basic sanity checks */
+  public void testEnumerateAll() throws Exception {
+    // just for debugging
+    int numTerms = 0;
+    int numWords = 0;
+    int lastWordId = -1;
+    int lastSourceId = -1;
+    CharacterDefinition charDef = CharacterDefinition.getInstance();
+    TokenInfoDictionary tid = TokenInfoDictionary.getInstance();
+    ConnectionCosts matrix = ConnectionCosts.getInstance();
+    FST<Long> fst = tid.getFST().getInternalFST();
+    IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<>(fst);
+    InputOutput<Long> mapping;
+    IntsRef scratch = new IntsRef();
+    while ((mapping = fstEnum.next()) != null) {
+      numTerms++;
+      IntsRef input = mapping.input;
+      char chars[] = new char[input.length];
+      for (int i = 0; i < chars.length; i++) {
+        chars[i] = (char)input.ints[input.offset+i];
+      }
+      String surfaceForm = new String(chars);
+      assertTrue(UnicodeUtil.validUTF16String(surfaceForm));
+      
+      Long output = mapping.output;
+      int sourceId = output.intValue();
+      // we walk in order, terms, sourceIds, and wordIds should always be increasing
+      assertTrue(sourceId > lastSourceId);
+      lastSourceId = sourceId;
+      tid.lookupWordIds(sourceId, scratch);
+      for (int i = 0; i < scratch.length; i++) {
+        numWords++;
+        int wordId = scratch.ints[scratch.offset+i];
+        assertTrue(wordId > lastWordId);
+        lastWordId = wordId;
+
+        int leftId = tid.getLeftId(wordId);
+        int rightId = tid.getRightId(wordId);
+
+        matrix.get(rightId, leftId);
+
+        tid.getWordCost(wordId);
+
+        POS.Type type = tid.getPOSType(wordId);
+        POS.Tag leftPOS = tid.getLeftPOS(wordId);
+        POS.Tag rightPOS = tid.getRightPOS(wordId);
+
+        if (type == POS.Type.MORPHEME) {
+          assertTrue(leftPOS == rightPOS);
+          String reading = tid.getReading(wordId);
+          boolean isHanja = charDef.isHanja(surfaceForm.charAt(0));
+          if (isHanja) {
+            assertTrue(reading != null);
+            for (int j = 0; j < reading.length(); j++) {
+              assertTrue(charDef.isHangul(reading.charAt(j)));
+            }
+          }
+          if (reading != null) {
+            assertTrue(UnicodeUtil.validUTF16String(reading));
+          }
+        } else {
+          if (type == POS.Type.COMPOUND) {
+            assertTrue(leftPOS == rightPOS);
+            assertTrue(leftPOS == POS.Tag.NNG || rightPOS == POS.Tag.NNP);
+          }
+          Dictionary.Morpheme[] decompound = tid.getMorphemes(wordId,  chars, 0, chars.length);
+          if (decompound != null) {
+            int offset = 0;
+            for (Dictionary.Morpheme morph : decompound) {
+              assertTrue(UnicodeUtil.validUTF16String(morph.surfaceForm));
+              if (type != POS.Type.INFLECT) {
+                assertEquals(morph.surfaceForm, surfaceForm.substring(offset, offset + morph.surfaceForm.length()));
+                offset += morph.surfaceForm.length();
+              }
+            }
+            assertTrue(offset <= surfaceForm.length());
+          }
+        }
+      }
+    }
+    if (VERBOSE) {
+      System.out.println("checked " + numTerms + " terms, " + numWords + " words.");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java
new file mode 100644
index 0000000..b008cf3
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/UserDictionaryTest.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.analysis.ko.TestKoreanTokenizer;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+public class UserDictionaryTest extends LuceneTestCase {
+  @Test
+  public void testLookup() throws IOException {
+    UserDictionary dictionary = TestKoreanTokenizer.readDict();
+    String s = "세종";
+    char[] sArray = s.toCharArray();
+    List<Integer> wordIds = dictionary.lookup(sArray, 0, s.length());
+    assertEquals(1, wordIds.size());
+    assertNull(dictionary.getMorphemes(wordIds.get(0), sArray, 0, s.length()));
+
+    s = "세종시";
+    sArray = s.toCharArray();
+    wordIds = dictionary.lookup(sArray, 0, s.length());
+    assertEquals(2, wordIds.size());
+    assertNull(dictionary.getMorphemes(wordIds.get(0), sArray, 0, s.length()));
+
+    Dictionary.Morpheme[] decompound = dictionary.getMorphemes(wordIds.get(1), sArray, 0, s.length());
+    assertTrue(decompound.length == 2);
+    assertEquals(decompound[0].posTag, POS.Tag.NNG);
+    assertEquals(decompound[0].surfaceForm, "세종");
+    assertEquals(decompound[1].posTag, POS.Tag.NNG);
+    assertEquals(decompound[1].surfaceForm, "시");
+
+    s = "c++";
+    sArray = s.toCharArray();
+    wordIds = dictionary.lookup(sArray, 0, s.length());
+    assertEquals(1, wordIds.size());
+    assertNull(dictionary.getMorphemes(wordIds.get(0), sArray, 0, s.length()));
+  }
+  
+  @Test
+  public void testRead() throws IOException {
+    UserDictionary dictionary = TestKoreanTokenizer.readDict();
+    assertNotNull(dictionary);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt
new file mode 100644
index 0000000..63c1c3a
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/userdict.txt
@@ -0,0 +1,5 @@
+# Additional nouns
+c++
+C샤프
+세종
+세종시 세종 시
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java
new file mode 100644
index 0000000..35c16ae
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.analysis.ko.dict.Dictionary;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+
+import org.apache.lucene.analysis.ko.dict.BinaryDictionary;
+
+public abstract class BinaryDictionaryWriter {
+  protected final Class<? extends BinaryDictionary> implClazz;
+  protected ByteBuffer buffer;
+  private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
+  private int[] targetMap = new int[8192];
+  private int[] targetMapOffsets = new int[8192];
+  private final ArrayList<String> posDict = new ArrayList<>();
+
+  public BinaryDictionaryWriter(Class<? extends BinaryDictionary> implClazz, int size) {
+    this.implClazz = implClazz;
+    buffer = ByteBuffer.allocate(size);
+  }
+  
+  /**
+   * put the entry in map
+   *
+   * mecab-ko-dic features
+   *
+   * 0   - surface
+   * 1   - left cost
+   * 2   - right cost
+   * 3   - word cost
+   * 4   - part of speech0+part of speech1+...
+   * 5   - semantic class
+   * 6   - T if the last character of the surface form has a coda, F otherwise
+   * 7   - reading
+   * 8   - POS type (*, Compound, Inflect, Preanalysis)
+   * 9   - left POS
+   * 10  - right POS
+   * 11  - expression
+   *
+   * @return current position of buffer, which will be wordId of next entry
+   */
+  public int put(String[] entry) {
+    short leftId = Short.parseShort(entry[1]);
+    short rightId = Short.parseShort(entry[2]);
+    short wordCost = Short.parseShort(entry[3]);
+
+    final POS.Type posType = POS.resolveType(entry[8]);
+    final POS.Tag leftPOS;
+    final POS.Tag rightPOS;
+    if (posType == POS.Type.MORPHEME || posType == POS.Type.COMPOUND || entry[9].equals("*")) {
+      leftPOS = POS.resolveTag(entry[4]);
+      assert (entry[9].equals("*") && entry[10].equals("*"));
+      rightPOS = leftPOS;
+    } else {
+      leftPOS = POS.resolveTag(entry[9]);
+      rightPOS = POS.resolveTag(entry[10]);
+    }
+    final String reading = entry[7].equals("*") ? "" : entry[0].equals(entry[7]) ? "" : entry[7];
+    final String expression = entry[11].equals("*") ? "" : entry[11];
+
+    // extend buffer if necessary
+    int left = buffer.remaining();
+    // worst case, 3 short + 4 bytes and features (all as utf-16)
+    int worstCase = 9 + 2*(expression.length() + reading.length());
+    if (worstCase > left) {
+      ByteBuffer newBuffer = ByteBuffer.allocate(ArrayUtil.oversize(buffer.limit() + worstCase - left, 1));
+      buffer.flip();
+      newBuffer.put(buffer);
+      buffer = newBuffer;
+    }
+
+    // add pos mapping
+    int toFill = 1+leftId - posDict.size();
+    for (int i = 0; i < toFill; i++) {
+      posDict.add(null);
+    }
+    String fullPOSData = leftPOS.name() + "," + entry[5];
+    String existing = posDict.get(leftId);
+    assert existing == null || existing.equals(fullPOSData);
+    posDict.set(leftId, fullPOSData);
+
+    final Dictionary.Morpheme[] morphemes;
+    // true if the POS and decompounds of the token are all the same.
+    boolean hasSinglePOS = (leftPOS == rightPOS);
+    if (posType != POS.Type.MORPHEME && expression.length() > 0) {
+      String[] exprTokens = expression.split("\\+");
+      morphemes = new Dictionary.Morpheme[exprTokens.length];
+      for (int i = 0; i < exprTokens.length; i++) {
+        String[] tokenSplit = exprTokens[i].split("\\/");
+        assert tokenSplit.length == 3;
+        POS.Tag exprTag = POS.resolveTag(tokenSplit[1]);
+        morphemes[i] = new Dictionary.Morpheme(exprTag, tokenSplit[0]);
+        if (leftPOS != exprTag) {
+          hasSinglePOS = false;
+        }
+      }
+    } else {
+      morphemes = new Dictionary.Morpheme[0];
+    }
+
+    int flags = 0;
+    if (hasSinglePOS) {
+      flags |= BinaryDictionary.HAS_SINGLE_POS;
+    }
+    if (posType == POS.Type.MORPHEME && reading.length() > 0) {
+      flags |= BinaryDictionary.HAS_READING;
+    }
+
+    assert leftId < 8192; // there are still unused bits
+    assert posType.ordinal() < 4;
+    buffer.putShort((short)(leftId << 2 | posType.ordinal()));
+    buffer.putShort((short) (rightId << 2 | flags));
+    buffer.putShort(wordCost);
+
+    if (posType == POS.Type.MORPHEME) {
+      assert leftPOS == rightPOS;
+      if (reading.length() > 0) {
+        writeString(reading);
+      }
+    } else {
+      if (hasSinglePOS == false) {
+        buffer.put((byte) rightPOS.ordinal());
+      }
+      buffer.put((byte) morphemes.length);
+      int compoundOffset = 0;
+      for (int i = 0; i < morphemes.length; i++) {
+        if (hasSinglePOS == false) {
+          buffer.put((byte) morphemes[i].posTag.ordinal());
+        }
+        if (posType != POS.Type.INFLECT) {
+          buffer.put((byte) morphemes[i].surfaceForm.length());
+          compoundOffset += morphemes[i].surfaceForm.length();
+        } else {
+          writeString(morphemes[i].surfaceForm);
+        }
+        assert compoundOffset <= entry[0].length() : Arrays.toString(entry);
+      }
+    }
+    return buffer.position();
+  }
+
+  private void writeString(String s) {
+    buffer.put((byte) s.length());
+    for (int i = 0; i < s.length(); i++) {
+      buffer.putChar(s.charAt(i));
+    }
+  }
+
+  public void addMapping(int sourceId, int wordId) {
+    assert wordId > lastWordId : "words out of order: " + wordId + " vs lastID: " + lastWordId;
+
+    if (sourceId > lastSourceId) {
+      assert sourceId > lastSourceId : "source ids out of order: lastSourceId=" + lastSourceId + " vs sourceId=" + sourceId;
+      targetMapOffsets = ArrayUtil.grow(targetMapOffsets, sourceId + 1);
+      for (int i = lastSourceId + 1; i <= sourceId; i++) {
+        targetMapOffsets[i] = targetMapEndOffset;
+      }
+    } else {
+      assert sourceId == lastSourceId;
+    }
+
+    targetMap = ArrayUtil.grow(targetMap, targetMapEndOffset + 1);
+    targetMap[targetMapEndOffset] = wordId;
+    targetMapEndOffset++;
+
+    lastSourceId = sourceId;
+    lastWordId = wordId;
+  }
+  
+  protected final String getBaseFileName(String baseDir) {
+    return baseDir + File.separator + implClazz.getName().replace('.', File.separatorChar);
+  }
+
+  /**
+   * Write dictionary in file
+   * @throws IOException if an I/O error occurs writing the dictionary files
+   */
+  public void write(String baseDir) throws IOException {
+    final String baseName = getBaseFileName(baseDir);
+    writeDictionary(baseName + BinaryDictionary.DICT_FILENAME_SUFFIX);
+    writeTargetMap(baseName + BinaryDictionary.TARGETMAP_FILENAME_SUFFIX);
+    writePosDict(baseName + BinaryDictionary.POSDICT_FILENAME_SUFFIX);
+  }
+
+  protected void writeTargetMap(String filename) throws IOException {
+    new File(filename).getParentFile().mkdirs();
+    OutputStream os = new FileOutputStream(filename);
+    try {
+      os = new BufferedOutputStream(os);
+      final DataOutput out = new OutputStreamDataOutput(os);
+      CodecUtil.writeHeader(out, BinaryDictionary.TARGETMAP_HEADER, BinaryDictionary.VERSION);
+
+      final int numSourceIds = lastSourceId + 1;
+      out.writeVInt(targetMapEndOffset); // <-- size of main array
+      out.writeVInt(numSourceIds + 1); // <-- size of offset array (+ 1 more entry)
+      int prev = 0, sourceId = 0;
+      for (int ofs = 0; ofs < targetMapEndOffset; ofs++) {
+        final int val = targetMap[ofs], delta = val - prev;
+        assert delta >= 0;
+        if (ofs == targetMapOffsets[sourceId]) {
+          out.writeVInt((delta << 1) | 0x01);
+          sourceId++;
+        } else {
+          out.writeVInt((delta << 1));
+        }
+        prev += delta;
+      }
+      assert sourceId == numSourceIds : "sourceId:"+sourceId+" != numSourceIds:"+numSourceIds;
+    } finally {
+      os.close();
+    }
+  }
+
+  protected void writePosDict(String filename) throws IOException {
+    new File(filename).getParentFile().mkdirs();
+    OutputStream os = new FileOutputStream(filename);
+    try {
+      os = new BufferedOutputStream(os);
+      final DataOutput out = new OutputStreamDataOutput(os);
+      CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
+      out.writeVInt(posDict.size());
+      for (String s : posDict) {
+        if (s == null) {
+          out.writeByte((byte) POS.Tag.UNKNOWN.ordinal());
+        } else {
+          String data[] = CSVUtil.parse(s);
+          assert data.length == 2 : "malformed pos/semanticClass: " + s;
+          out.writeByte((byte) POS.Tag.valueOf(data[0]).ordinal());
+        }
+      }
+    } finally {
+      os.close();
+    }
+  }
+  
+  protected void writeDictionary(String filename) throws IOException {
+    new File(filename).getParentFile().mkdirs();
+    final FileOutputStream os = new FileOutputStream(filename);
+    try {
+      final DataOutput out = new OutputStreamDataOutput(os);
+      CodecUtil.writeHeader(out, BinaryDictionary.DICT_HEADER, BinaryDictionary.VERSION);
+      out.writeVInt(buffer.position());
+      final WritableByteChannel channel = Channels.newChannel(os);
+      // Write Buffer
+      buffer.flip();  // set position to 0, set limit to current position
+      channel.write(buffer);
+      assert buffer.remaining() == 0L;
+    } finally {
+      os.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/CharacterDefinitionWriter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/CharacterDefinitionWriter.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/CharacterDefinitionWriter.java
new file mode 100644
index 0000000..5a78549
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/CharacterDefinitionWriter.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+
+public final class CharacterDefinitionWriter {
+
+  private final byte[] characterCategoryMap = new byte[0x10000];
+  
+  private final boolean[] invokeMap = new boolean[CharacterDefinition.CLASS_COUNT];
+  private final boolean[] groupMap = new boolean[CharacterDefinition.CLASS_COUNT];
+    
+  /**
+   * Constructor for building. TODO: remove write access
+   */
+  public CharacterDefinitionWriter() {
+    Arrays.fill(characterCategoryMap, CharacterDefinition.DEFAULT);
+  }
+  
+  /**
+   * Put mapping from unicode code point to character class.
+   * 
+   * @param codePoint
+   *            code point
+   * @param characterClassName character class name
+   */
+  public void putCharacterCategory(int codePoint, String characterClassName) {
+    characterClassName = characterClassName.split(" ")[0]; // use first
+    // category
+    // class
+    
+    // Override Nakaguro
+    if (codePoint == 0x30FB) {
+      characterClassName = "SYMBOL";
+    }
+    characterCategoryMap[codePoint] = CharacterDefinition.lookupCharacterClass(characterClassName);
+  }
+  
+  public void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
+    final byte characterClass = CharacterDefinition.lookupCharacterClass(characterClassName);
+    invokeMap[characterClass] = invoke == 1;
+    groupMap[characterClass] = group == 1;
+    // TODO: length def ignored
+  }
+  
+  public void write(String baseDir) throws IOException {
+    String filename = baseDir + File.separator +
+      CharacterDefinition.class.getName().replace('.', File.separatorChar) + CharacterDefinition.FILENAME_SUFFIX;
+    new File(filename).getParentFile().mkdirs();
+    OutputStream os = new FileOutputStream(filename);
+    try {
+      os = new BufferedOutputStream(os);
+      final DataOutput out = new OutputStreamDataOutput(os);
+      CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
+      out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
+      for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
+        final byte b = (byte) (
+          (invokeMap[i] ? 0x01 : 0x00) | 
+          (groupMap[i] ? 0x02 : 0x00)
+        );
+        out.writeByte(b);
+      }
+    } finally {
+      os.close();
+    }
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java
new file mode 100644
index 0000000..29659de
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+
+public class ConnectionCostsBuilder {
+  
+  private ConnectionCostsBuilder() {
+  }
+  
+  public static ConnectionCostsWriter build(String filename) throws IOException {
+    FileInputStream inputStream = new FileInputStream(filename);
+    Charset cs = StandardCharsets.US_ASCII;
+    CharsetDecoder decoder = cs.newDecoder()
+        .onMalformedInput(CodingErrorAction.REPORT)
+        .onUnmappableCharacter(CodingErrorAction.REPORT);
+    InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
+    LineNumberReader lineReader = new LineNumberReader(streamReader);
+    
+    String line = lineReader.readLine();
+    String[] dimensions = line.split("\\s+");
+    
+    assert dimensions.length == 2;
+    
+    int forwardSize = Integer.parseInt(dimensions[0]);
+    int backwardSize = Integer.parseInt(dimensions[1]);
+    
+    assert forwardSize > 0 && backwardSize > 0;
+    
+    ConnectionCostsWriter costs = new ConnectionCostsWriter(forwardSize, backwardSize);
+    
+    while ((line = lineReader.readLine()) != null) {
+      String[] fields = line.split("\\s+");
+      
+      assert fields.length == 3;
+      
+      int forwardId = Integer.parseInt(fields[0]);
+      int backwardId = Integer.parseInt(fields[1]);
+      int cost = Integer.parseInt(fields[2]);
+      
+      costs.add(forwardId, backwardId, cost);
+    }
+    return costs;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java
new file mode 100644
index 0000000..f16f827
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+
+public final class ConnectionCostsWriter {
+  
+  private final short[][] costs; // array is backward IDs first since get is called using the same backward ID consecutively. maybe doesn't matter.
+  private final int forwardSize;
+  private final int backwardSize;
+  /**
+   * Constructor for building. TODO: remove write access
+   */
+  public ConnectionCostsWriter(int forwardSize, int backwardSize) {
+    this.forwardSize = forwardSize;
+    this.backwardSize = backwardSize;
+    this.costs = new short[backwardSize][forwardSize];
+  }
+  
+  public void add(int forwardId, int backwardId, int cost) {
+    this.costs[backwardId][forwardId] = (short)cost;
+  }
+  
+  public void write(String baseDir) throws IOException {
+    String filename = baseDir + File.separator +
+      ConnectionCosts.class.getName().replace('.', File.separatorChar) + ConnectionCosts.FILENAME_SUFFIX;
+    new File(filename).getParentFile().mkdirs();
+    OutputStream os = new FileOutputStream(filename);
+    try {
+      os = new BufferedOutputStream(os);
+      final DataOutput out = new OutputStreamDataOutput(os);
+      CodecUtil.writeHeader(out, ConnectionCosts.HEADER, ConnectionCosts.VERSION);
+      out.writeVInt(forwardSize);
+      out.writeVInt(backwardSize);
+      int last = 0;
+      assert costs.length == backwardSize;
+      for (short[] a : costs) {
+        assert a.length == forwardSize;
+        for (int i = 0; i < a.length; i++) {
+          int delta = (int)a[i] - last;
+          out.writeZInt(delta);
+          last = a[i];
+        }
+      }
+    } finally {
+      os.close();
+    }
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java
new file mode 100644
index 0000000..e0039a2
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.File;
+import java.io.IOException;
+
+public class DictionaryBuilder {
+
+  private DictionaryBuilder() {
+  }
+  
+  public static void build(String inputDirname, String outputDirname, String encoding, boolean normalizeEntry) throws IOException {
+    System.out.println("building tokeninfo dict...");
+    TokenInfoDictionaryBuilder tokenInfoBuilder = new TokenInfoDictionaryBuilder(encoding, normalizeEntry);
+    TokenInfoDictionaryWriter tokenInfoDictionary = tokenInfoBuilder.build(inputDirname);
+    tokenInfoDictionary.write(outputDirname);
+    tokenInfoDictionary = null;
+    tokenInfoBuilder = null;
+    System.out.println("done");
+    
+    System.out.print("building unknown word dict...");
+    UnknownDictionaryBuilder unkBuilder = new UnknownDictionaryBuilder(encoding);
+    UnknownDictionaryWriter unkDictionary = unkBuilder.build(inputDirname);
+    unkDictionary.write(outputDirname);
+    unkDictionary = null;
+    unkBuilder = null;
+    System.out.println("done");
+    
+    System.out.print("building connection costs...");
+    ConnectionCostsWriter connectionCosts
+      = ConnectionCostsBuilder.build(inputDirname + File.separator + "matrix.def");
+    connectionCosts.write(outputDirname);
+    System.out.println("done");
+  }
+  
+  public static void main(String[] args) throws IOException {
+    String inputDirname = args[0];
+    String outputDirname = args[1];
+    String inputEncoding = args[2];
+    boolean normalizeEntries = Boolean.parseBoolean(args[3]);
+    
+    System.out.println("dictionary builder");
+    System.out.println("");
+    System.out.println("input directory: " + inputDirname);
+    System.out.println("output directory: " + outputDirname);
+    System.out.println("input encoding: " + inputEncoding);
+    System.out.println("normalize entries: " + normalizeEntries);
+    System.out.println("");
+    DictionaryBuilder.build(inputDirname, outputDirname, inputEncoding, normalizeEntries);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java
new file mode 100644
index 0000000..de60daa
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.util.IntsRefBuilder;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
+
+import com.ibm.icu.text.Normalizer2;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+
+public class TokenInfoDictionaryBuilder {
+  
+  /** Internal word id - incrementally assigned as entries are read and added. This will be byte offset of dictionary file */
+  private int offset = 0;
+  
+  private String encoding = "utf-8";
+  
+  private boolean normalizeEntries = false;
+  private Normalizer2 normalizer;
+
+  public TokenInfoDictionaryBuilder(String encoding, boolean normalizeEntries) {
+    this.encoding = encoding;
+    this.normalizeEntries = normalizeEntries;
+    this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null;
+  }
+  
+  public TokenInfoDictionaryWriter build(String dirname) throws IOException {
+    FilenameFilter filter = (dir, name) -> name.endsWith(".csv");
+    ArrayList<File> csvFiles = new ArrayList<>();
+    for (File file : new File(dirname).listFiles(filter)) {
+      csvFiles.add(file);
+    }
+    Collections.sort(csvFiles);
+    return buildDictionary(csvFiles);
+  }
+
+  public TokenInfoDictionaryWriter buildDictionary(List<File> csvFiles) throws IOException {
+    TokenInfoDictionaryWriter dictionary = new TokenInfoDictionaryWriter(10 * 1024 * 1024);
+    
+    // all lines in the file
+    System.out.println("  parse...");
+    List<String[]> lines = new ArrayList<>(400000);
+    for (File file : csvFiles){
+      FileInputStream inputStream = new FileInputStream(file);
+      Charset cs = Charset.forName(encoding);
+      CharsetDecoder decoder = cs.newDecoder()
+          .onMalformedInput(CodingErrorAction.REPORT)
+          .onUnmappableCharacter(CodingErrorAction.REPORT);
+      InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
+      BufferedReader reader = new BufferedReader(streamReader);
+      
+      String line = null;
+      while ((line = reader.readLine()) != null) {
+        String[] entry = CSVUtil.parse(line);
+
+        if(entry.length < 12) {
+          System.out.println("Entry in CSV is not valid: " + line);
+          continue;
+        }
+
+        // NFKC normalize dictionary entry
+        if (normalizeEntries) {
+          String[] normalizedEntry = new String[entry.length];
+          for (int i = 0; i < entry.length; i++) {
+            normalizedEntry[i] = normalizer.normalize(entry[i]);
+          }
+          lines.add(normalizedEntry);
+        } else {
+          lines.add(entry);
+        }
+      }
+    }
+    
+    System.out.println("  sort...");
+
+    // sort by term: we sorted the files already and use a stable sort.
+    Collections.sort(lines, Comparator.comparing(left -> left[0]));
+    
+    System.out.println("  encode...");
+
+    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
+    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, 15);
+    IntsRefBuilder scratch = new IntsRefBuilder();
+    long ord = -1; // first ord will be 0
+    String lastValue = null;
+
+    // build tokeninfo dictionary
+    for (String[] entry : lines) {
+      int next = dictionary.put(entry);
+
+      if(next == offset){
+        System.out.println("Failed to process line: " + Arrays.toString(entry));
+        continue;
+      }
+
+      String token = entry[0];
+      if (!token.equals(lastValue)) {
+        // new word to add to fst
+        ord++;
+        lastValue = token;
+        scratch.grow(token.length());
+        scratch.setLength(token.length());
+        for (int i = 0; i < token.length(); i++) {
+          scratch.setIntAt(i, (int) token.charAt(i));
+        }
+        fstBuilder.add(scratch.get(), ord);
+      }
+      dictionary.addMapping((int)ord, offset);
+      offset = next;
+    }
+
+    final FST<Long> fst = fstBuilder.finish();
+    
+    System.out.print("  " + fstBuilder.getNodeCount() + " nodes, " + fstBuilder.getArcCount() + " arcs, " + fst.ramBytesUsed() + " bytes...  ");
+    dictionary.setFST(fst);
+    System.out.println(" done");
+    
+    return dictionary;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java
new file mode 100644
index 0000000..c1554d2
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;
+import org.apache.lucene.util.fst.FST;
+
+public class TokenInfoDictionaryWriter extends BinaryDictionaryWriter {
+  private FST<Long> fst;
+
+  public TokenInfoDictionaryWriter(int size) {
+    super(TokenInfoDictionary.class, size);
+  }
+  
+  public void setFST(FST<Long> fst) {
+    this.fst = fst;
+  }
+  
+  @Override
+  public void write(String baseDir) throws IOException {
+    super.write(baseDir);
+    writeFST(getBaseFileName(baseDir) + TokenInfoDictionary.FST_FILENAME_SUFFIX);
+  }
+  
+  protected void writeFST(String filename) throws IOException {
+    Path p = Paths.get(filename);
+    Files.createDirectories(p.getParent());
+    fst.save(p);
+  }  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryBuilder.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryBuilder.java
new file mode 100644
index 0000000..a408866
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryBuilder.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
+
+public class UnknownDictionaryBuilder {
+  private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,1798,3559,3677,SY,*,*,*,*,*,*,*";
+  
+  private String encoding = "utf-8";
+  
+  public UnknownDictionaryBuilder(String encoding) {
+    this.encoding = encoding;
+  }
+  
+  public UnknownDictionaryWriter build(String dirname) throws IOException {
+    UnknownDictionaryWriter unkDictionary = readDictionaryFile(dirname + File.separator + "unk.def");  //Should be only one file
+    readCharacterDefinition(dirname + File.separator + "char.def", unkDictionary);
+    return unkDictionary;
+  }
+  
+  public UnknownDictionaryWriter readDictionaryFile(String filename)
+      throws IOException {
+    return readDictionaryFile(filename, encoding);
+  }
+  
+  public UnknownDictionaryWriter readDictionaryFile(String filename, String encoding)
+      throws IOException {
+    UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);
+    
+    FileInputStream inputStream = new FileInputStream(filename);
+    Charset cs = Charset.forName(encoding);
+    CharsetDecoder decoder = cs.newDecoder()
+        .onMalformedInput(CodingErrorAction.REPORT)
+        .onUnmappableCharacter(CodingErrorAction.REPORT);
+    InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
+    LineNumberReader lineReader = new LineNumberReader(streamReader);
+    
+    dictionary.put(CSVUtil.parse(NGRAM_DICTIONARY_ENTRY));
+    
+    List<String[]> lines = new ArrayList<>();
+    String line = null;
+    while ((line = lineReader.readLine()) != null) {
+      // note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
+      // even though the unknown dictionary returns hardcoded null here.
+      final String[] parsed = CSVUtil.parse(line + ",*,*"); // Probably we don't need to validate entry
+      lines.add(parsed);
+    }
+    
+    Collections.sort(lines, new Comparator<String[]>() {
+      public int compare(String[] left, String[] right) {
+        int leftId = CharacterDefinition.lookupCharacterClass(left[0]);
+        int rightId = CharacterDefinition.lookupCharacterClass(right[0]);
+        return leftId - rightId;
+      }
+    });
+    
+    for (String[] entry : lines) {
+      dictionary.put(entry);
+    }
+    
+    return dictionary;
+  }
+  
+  public void readCharacterDefinition(String filename, UnknownDictionaryWriter dictionary) throws IOException {
+    FileInputStream inputStream = new FileInputStream(filename);
+    InputStreamReader streamReader = new InputStreamReader(inputStream, encoding);
+    LineNumberReader lineReader = new LineNumberReader(streamReader);
+    
+    String line = null;
+    
+    while ((line = lineReader.readLine()) != null) {
+      line = line.replaceAll("^\\s", "");
+      line = line.replaceAll("\\s*#.*", "");
+      line = line.replaceAll("\\s+", " ");
+      
+      // Skip empty line or comment line
+      if(line.length() == 0) {
+        continue;
+      }
+      
+      if(line.startsWith("0x")) {  // Category mapping
+        String[] values = line.split(" ", 2);  // Split only first space
+        
+        if(!values[0].contains("..")) {
+          int cp = Integer.decode(values[0]).intValue();
+          dictionary.putCharacterCategory(cp, values[1]);
+        } else {
+          String[] codePoints = values[0].split("\\.\\.");
+          int cpFrom = Integer.decode(codePoints[0]).intValue();
+          int cpTo = Integer.decode(codePoints[1]).intValue();
+          
+          for(int i = cpFrom; i <= cpTo; i++){
+            dictionary.putCharacterCategory(i, values[1]);
+          }
+        }
+      } else {  // Invoke definition
+        String[] values = line.split(" "); // Consecutive space is merged above
+        String characterClassName = values[0];
+        int invoke = Integer.parseInt(values[1]);
+        int group = Integer.parseInt(values[2]);
+        int length = Integer.parseInt(values[3]);
+        dictionary.putInvokeDefinition(characterClassName, invoke, group, length);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java
new file mode 100644
index 0000000..ff98a8d
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
+import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
+
+public class UnknownDictionaryWriter extends BinaryDictionaryWriter {
+
+  private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter();
+
+  public UnknownDictionaryWriter(int size) {
+    super(UnknownDictionary.class, size);
+  }
+  
+  @Override
+  public int put(String[] entry) {
+    // Get wordId of current entry
+    int wordId = buffer.position();
+    
+    // Put entry
+    int result = super.put(entry);
+    
+    // Put entry in targetMap
+    int characterId = CharacterDefinition.lookupCharacterClass(entry[0]);
+    addMapping(characterId, wordId);
+    return result;
+  }
+  
+  /**
+   * Put mapping from unicode code point to character class.
+   * 
+   * @param codePoint code point
+   * @param characterClassName character class name
+   */
+  public void putCharacterCategory(int codePoint, String characterClassName) {
+    characterDefinition.putCharacterCategory(codePoint, characterClassName);
+  }
+  
+  public void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
+    characterDefinition.putInvokeDefinition(characterClassName, invoke, group, length);
+  }
+  
+  @Override
+  public void write(String baseDir) throws IOException {
+    super.write(baseDir);
+    characterDefinition.write(baseDir);
+  }
+}

[11/46] lucene-solr:jira/solr-11833: LUCENE-8245: Use strict bounds checking for edge planes when assessing envelope crossings. It's the only way to insure we don't overdetect or underdetect such intersections.

Posted by ab...@apache.org.

LUCENE-8245: Use strict bounds checking for edge planes when assessing envelope crossings.  It's the only way to insure we don't overdetect or underdetect such intersections.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/832e8974
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/832e8974
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/832e8974

Branch: refs/heads/jira/solr-11833
Commit: 832e89748ea97e262437d420f54aac2a1b87b505
Parents: 017f59b
Author: Karl Wright <Da...@gmail.com>
Authored: Thu Apr 12 07:08:51 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Thu Apr 12 07:08:51 2018 -0400

----------------------------------------------------------------------
 .../spatial3d/geom/GeoComplexPolygon.java       | 58 +++++++++++---------
 .../lucene/spatial3d/geom/SidedPlane.java       | 12 ++++
 .../lucene/spatial3d/geom/GeoPolygonTest.java   |  2 -
 .../spatial3d/geom/RandomGeoPolygonTest.java    |  4 +-
 4 files changed, 48 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/832e8974/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
index 487a771..c4e2c93 100644
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
@@ -971,12 +971,14 @@ class GeoComplexPolygon extends GeoBasePolygon {
       */
     private int countCrossings(final Edge edge,
       final Plane envelopePlane, final Membership envelopeBound) {
-      final GeoPoint[] intersections = edge.plane.findIntersections(planetModel, envelopePlane, edge.startPlane, edge.endPlane, envelopeBound);
+      final GeoPoint[] intersections = edge.plane.findIntersections(planetModel, envelopePlane, envelopeBound);
       int crossings = 0;
       if (intersections != null) {
         for (final GeoPoint intersection : intersections) {
-          // It's unique, so assess it
-          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+          if (edge.startPlane.strictlyWithin(intersection) && edge.endPlane.strictlyWithin(intersection)) {
+            // It's unique, so assess it
+            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+          }
         }
       }
       return crossings;
@@ -1062,12 +1064,14 @@ class GeoComplexPolygon extends GeoBasePolygon {
       */
     private int countCrossings(final Edge edge,
       final Plane envelopePlane, final Membership envelopeBound1, final Membership envelopeBound2) {
-      final GeoPoint[] intersections = edge.plane.findIntersections(planetModel, envelopePlane, edge.startPlane, edge.endPlane, envelopeBound1, envelopeBound2);
+      final GeoPoint[] intersections = edge.plane.findIntersections(planetModel, envelopePlane, envelopeBound1, envelopeBound2);
       int crossings = 0;
       if (intersections != null) {
         for (final GeoPoint intersection : intersections) {
-          // It's unique, so assess it
-          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+          if (edge.startPlane.strictlyWithin(intersection) && edge.endPlane.strictlyWithin(intersection)) {
+            // It's unique, so assess it
+            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+          }
         }
       }
       return crossings;
@@ -1381,35 +1385,39 @@ class GeoComplexPolygon extends GeoBasePolygon {
     private int countCrossings(final Edge edge,
       final Plane travelEnvelopePlane, final Membership travelEnvelopeBound1, final Membership travelEnvelopeBound2,
       final Plane testPointEnvelopePlane, final Membership testPointEnvelopeBound1, final Membership testPointEnvelopeBound2) {
-      final GeoPoint[] travelIntersections = edge.plane.findIntersections(planetModel, travelEnvelopePlane, edge.startPlane, edge.endPlane, travelEnvelopeBound1, travelEnvelopeBound2);
-      final GeoPoint[] testPointIntersections = edge.plane.findIntersections(planetModel, testPointEnvelopePlane, edge.startPlane, edge.endPlane, testPointEnvelopeBound1, testPointEnvelopeBound2);
+      final GeoPoint[] travelIntersections = edge.plane.findIntersections(planetModel, travelEnvelopePlane, travelEnvelopeBound1, travelEnvelopeBound2);
+      final GeoPoint[] testPointIntersections = edge.plane.findIntersections(planetModel, testPointEnvelopePlane, testPointEnvelopeBound1, testPointEnvelopeBound2);
       int crossings = 0;
       if (travelIntersections != null) {
         for (final GeoPoint intersection : travelIntersections) {
-          // Make sure it's not a dup
-          boolean notDup = true;
-          if (testPointIntersections != null) {
-            for (final GeoPoint otherIntersection : testPointIntersections) {
-              if (intersection.isNumericallyIdentical(otherIntersection)) {
-                //System.out.println("  Points "+intersection+" and "+otherIntersection+" are duplicates");
-                notDup = false;
-                break;
+          if (edge.startPlane.strictlyWithin(intersection) && edge.endPlane.strictlyWithin(intersection)) {
+            // Make sure it's not a dup
+            boolean notDup = true;
+            if (testPointIntersections != null) {
+              for (final GeoPoint otherIntersection : testPointIntersections) {
+                if (edge.startPlane.strictlyWithin(otherIntersection) && edge.endPlane.strictlyWithin(otherIntersection) && intersection.isNumericallyIdentical(otherIntersection)) {
+                  //System.out.println("  Points "+intersection+" and "+otherIntersection+" are duplicates");
+                  notDup = false;
+                  break;
+                }
               }
             }
+            if (!notDup) {
+              continue;
+            }
+            // It's unique, so assess it
+            //System.out.println("  Assessing travel envelope intersection point "+intersection+"...");
+            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
           }
-          if (!notDup) {
-            continue;
-          }
-          // It's unique, so assess it
-          //System.out.println("  Assessing travel envelope intersection point "+intersection+"...");
-          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
         }
       }
       if (testPointIntersections != null) {
         for (final GeoPoint intersection : testPointIntersections) {
-          // It's unique, so assess it
-          //System.out.println("  Assessing testpoint envelope intersection point "+intersection+"...");
-          crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+          if (edge.startPlane.strictlyWithin(intersection) && edge.endPlane.strictlyWithin(intersection)) {
+            // It's unique, so assess it
+            //System.out.println("  Assessing testpoint envelope intersection point "+intersection+"...");
+            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+          }
         }
       }
       return crossings;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/832e8974/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
index 66e9376..238933c 100755
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
@@ -234,6 +234,18 @@ public class SidedPlane extends Plane implements Membership {
     return sigNum == this.sigNum;
   }
 
+  public boolean strictlyWithin(final Vector v) {
+    double evalResult = evaluate(v.x, v.y, v.z);
+    double sigNum = Math.signum(evalResult);
+    return sigNum == 0.0 || sigNum == this.sigNum;
+  }
+
+  public boolean strictlyWithin(double x, double y, double z) {
+    double evalResult = evaluate(x, y, z);
+    double sigNum = Math.signum(evalResult);
+    return sigNum == 0.0 || sigNum == this.sigNum;
+  }
+
   @Override
   public boolean equals(Object o) {
     if (this == o) return true;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/832e8974/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
index cd65018..adff16c 100755
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
@@ -1428,7 +1428,6 @@ shape:
   }
 
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
   public void testComplexPolygonPlaneOutsideWorld() {
     List<GeoPoint> points = new ArrayList<>();
     points.add(new GeoPoint(PlanetModel.SPHERE, -0.5, -0.5));
@@ -1503,7 +1502,6 @@ shape:
   }
 
   @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
   public void testLUCENE8245() {
     //POLYGON((-70.19447784626787 -83.117346007187,0.0 2.8E-322,-139.99870438810106 7.994601469571884,-143.14292702670522 -18.500141088122664,-158.7373186858464 -35.42942085357812,-70.19447784626787 -83.117346007187))
     final List<GeoPoint> points = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/832e8974/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
index 93d675a..44d682c 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
@@ -24,6 +24,8 @@ import com.carrotsearch.randomizedtesting.annotations.Repeat;
 import com.carrotsearch.randomizedtesting.generators.BiasedNumbers;
 import org.junit.Test;
 
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomDouble;
+
 /**
  * Random test for polygons.
  */
@@ -92,7 +94,7 @@ public class RandomGeoPolygonTest extends RandomGeo3dShapeGenerator {
    * biased doubles.
    */
   @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
   @Repeat(iterations = 10)
   public void testComparePolygons() {
     final PlanetModel planetModel = randomPlanetModel();

[15/46] lucene-solr:jira/solr-11833: LUCENE-8245: fix unused import

Posted by ab...@apache.org.

LUCENE-8245: fix unused import


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4f694d5c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4f694d5c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4f694d5c

Branch: refs/heads/jira/solr-11833
Commit: 4f694d5c7259355e7b3c20f5ceef2eb63e50c893
Parents: 7a57ca8c0
Author: David Smiley <ds...@apache.org>
Authored: Thu Apr 12 12:30:09 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Thu Apr 12 12:30:09 2018 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java     | 2 --
 1 file changed, 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f694d5c/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
index 44d682c..ed2261b 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
@@ -24,8 +24,6 @@ import com.carrotsearch.randomizedtesting.annotations.Repeat;
 import com.carrotsearch.randomizedtesting.generators.BiasedNumbers;
 import org.junit.Test;
 
-import static com.carrotsearch.randomizedtesting.RandomizedTest.randomDouble;
-
 /**
  * Random test for polygons.
  */

[24/46] lucene-solr:jira/solr-11833: LUCENE-8231: Add a new analysis module (nori) similar to Kuromoji but to handle Korean

Posted by ab...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/tools/test/org/apache/lucene/analysis/ko/dict/UnknownDictionaryTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/tools/test/org/apache/lucene/analysis/ko/dict/UnknownDictionaryTest.java b/lucene/analysis/nori/src/tools/test/org/apache/lucene/analysis/ko/dict/UnknownDictionaryTest.java
new file mode 100644
index 0000000..2bba714
--- /dev/null
+++ b/lucene/analysis/nori/src/tools/test/org/apache/lucene/analysis/ko/dict/UnknownDictionaryTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+
+import org.apache.lucene.analysis.ko.util.CSVUtil;
+import org.apache.lucene.analysis.ko.util.UnknownDictionaryWriter;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+public class UnknownDictionaryTest extends LuceneTestCase {
+
+  @Test
+  public void testPutCharacterCategory() {
+    UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
+    
+    try{
+      unkDic.putCharacterCategory(0, "DUMMY_NAME");
+      fail();
+    } catch(Exception e) {
+      
+    }
+    
+    try{
+      unkDic.putCharacterCategory(-1, "HANGUL");
+      fail();
+    } catch(Exception e) {
+      
+    }
+    
+    unkDic.putCharacterCategory(0, "DEFAULT");
+    unkDic.putCharacterCategory(1, "GREEK");
+    unkDic.putCharacterCategory(2, "HANJA");
+    unkDic.putCharacterCategory(3, "HANGUL");
+    unkDic.putCharacterCategory(4, "KANJI");
+  }
+  
+  @Test
+  public void testPut() {
+    UnknownDictionaryWriter unkDic = new UnknownDictionaryWriter(10 * 1024 * 1024);
+    try{
+      unkDic.put(CSVUtil.parse("HANGUL,1800,3562,UNKNOWN,*,*,*,*,*,*,*"));
+      fail();
+    } catch(Exception e){
+      
+    }
+
+    String entry1 = "ALPHA,1793,3533,795,SL,*,*,*,*,*,*,*";
+    String entry2 = "HANGUL,1800,3562,10247,UNKNOWN,*,*,*,*,*,*,*";
+    String entry3 = "HANJA,1792,3554,-821,SH,*,*,*,*,*,*,*";
+
+    unkDic.putCharacterCategory(0, "ALPHA");
+    unkDic.putCharacterCategory(1, "HANGUL");
+    unkDic.putCharacterCategory(2, "HANJA");
+    
+    unkDic.put(CSVUtil.parse(entry1));
+    unkDic.put(CSVUtil.parse(entry2));
+    unkDic.put(CSVUtil.parse(entry3));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index f19e33d..14e7194 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -85,6 +85,7 @@ io.prometheus.version = 0.2.0
 /junit/junit = 4.10
 
 /mecab/mecab-ipadic = 2.7.0-20070801
+/mecab/mecab-ko-dic = 2.0.3-20170922
 /mecab/mecab-naist-jdic = 0.6.3b-20111013
 /net.arnx/jsonic = 1.2.7
 /net.bytebuddy/byte-buddy = 1.6.2

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/module-build.xml
----------------------------------------------------------------------
diff --git a/lucene/module-build.xml b/lucene/module-build.xml
index 92ca60e..9f17c1c 100644
--- a/lucene/module-build.xml
+++ b/lucene/module-build.xml
@@ -451,6 +451,28 @@
     <property name="analyzers-morfologik-javadocs.uptodate" value="true"/>
   </target>
 
+  <property name="analyzers-nori.jar" value="${common.dir}/build/analysis/nori/lucene-analyzers-nori-${version}.jar"/>
+  <target name="check-analyzers-nori-uptodate" unless="analyzers-nori.uptodate">
+    <module-uptodate name="analysis/nori" jarfile="${analyzers-nori.jar}" property="analyzers-nori.uptodate"/>
+  </target>
+  <target name="jar-analyzers-nori" unless="analyzers-nori.uptodate" depends="check-analyzers-nori-uptodate">
+    <ant dir="${common.dir}/analysis/nori" target="jar-core" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="analyzers-nori.uptodate" value="true"/>
+  </target>
+
+  <property name="analyzers-nori-javadoc.jar" value="${common.dir}/build/analysis/nori/lucene-analyzers-nori-${version}-javadoc.jar"/>
+  <target name="check-analyzers-nori-javadocs-uptodate" unless="analyzers-nori-javadocs.uptodate">
+    <module-uptodate name="analysis/nori" jarfile="${analyzers-nori-javadoc.jar}" property="analyzers-nori-javadocs.uptodate"/>
+  </target>
+  <target name="javadocs-analyzers-nori" unless="analyzers-nori-javadocs.uptodate" depends="check-analyzers-nori-javadocs-uptodate">
+    <ant dir="${common.dir}/analysis/nori" target="javadocs" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="analyzers-nori-javadocs.uptodate" value="true"/>
+  </target>
+
   <property name="codecs.jar" value="${common.dir}/build/codecs/lucene-codecs-${version}.jar"/>
   <target name="check-codecs-uptodate" unless="codecs.uptodate">
     <module-uptodate name="codecs" jarfile="${codecs.jar}" property="codecs.uptodate"/>

[26/46] lucene-solr:jira/solr-11833: LUCENE-8231: Add a new analysis module (nori) similar to Kuromoji but to handle Korean

Posted by ab...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
new file mode 100644
index 0000000..43a02d3
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.lucene.analysis.ko.dict.UserDictionary;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+
+/**
+ * Factory for {@link KoreanTokenizer}.
+ * @lucene.experimental
+ */
+public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
+  private static final String USER_DICT_PATH = "userDictionary";
+  private static final String USER_DICT_ENCODING = "userDictionaryEncoding";
+  private static final String DECOMPOUND_MODE = "decompoundMode";
+  private static final String OUTPUT_UNKNOWN_UNIGRAMS = "outputUnknownUnigrams";
+
+  private final String userDictionaryPath;
+  private final String userDictionaryEncoding;
+  private UserDictionary userDictionary;
+
+  private final KoreanTokenizer.DecompoundMode mode;
+  private final boolean outputUnknownUnigrams;
+
+  /** Creates a new KoreanTokenizerFactory */
+  public KoreanTokenizerFactory(Map<String, String> args) {
+    super(args);
+    userDictionaryPath = args.remove(USER_DICT_PATH);
+    userDictionaryEncoding = args.remove(USER_DICT_ENCODING);
+    mode = KoreanTokenizer.DecompoundMode.valueOf(get(args, DECOMPOUND_MODE, KoreanTokenizer.DEFAULT_DECOMPOUND.toString()).toUpperCase(Locale.ROOT));
+    outputUnknownUnigrams = getBoolean(args, OUTPUT_UNKNOWN_UNIGRAMS, false);
+
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) throws IOException {
+    if (userDictionaryPath != null) {
+      try (InputStream stream = loader.openResource(userDictionaryPath)) {
+        String encoding = userDictionaryEncoding;
+        if (encoding == null) {
+          encoding = IOUtils.UTF_8;
+        }
+        CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
+          .onMalformedInput(CodingErrorAction.REPORT)
+          .onUnmappableCharacter(CodingErrorAction.REPORT);
+        Reader reader = new InputStreamReader(stream, decoder);
+        userDictionary = UserDictionary.open(reader);
+      }
+    } else {
+      userDictionary = null;
+    }
+  }
+
+  @Override
+  public KoreanTokenizer create(AttributeFactory factory) {
+    return new KoreanTokenizer(factory, userDictionary, mode, outputUnknownUnigrams);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/POS.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/POS.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/POS.java
new file mode 100644
index 0000000..263c9c8
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/POS.java
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import java.util.Locale;
+
+/**
+ * Part of speech classification for Korean based on Sejong corpus classification.
+ * The list of tags and their meanings is available here:
+ * https://docs.google.com/spreadsheets/d/1-9blXKjtjeKZqsf4NzHeYJCrr49-nXeRF6D80udfcwY
+ */
+public class POS {
+
+  /**
+   * The type of the token.
+   */
+  public enum Type {
+    /**
+     * A simple morpheme.
+     */
+    MORPHEME,
+
+    /**
+     * Compound noun.
+     */
+    COMPOUND,
+
+    /**
+     * Inflected token.
+     */
+    INFLECT,
+
+    /**
+     * Pre-analysis token.
+     */
+    PREANALYSIS,
+  }
+
+  /**
+   * Part of speech tag for Korean based on Sejong corpus classification.
+   */
+  public enum Tag {
+    /**
+     * Verbal endings
+     */
+    E(100, "Verbal endings"),
+
+    /**
+     * Interjection
+     */
+    IC(110, "Interjection"),
+
+    /**
+     * Ending Particle
+     */
+    J(120, "Ending Particle"),
+
+    /**
+     * General Adverb
+     */
+    MAG(130, "General Adverb"),
+
+    /**
+     * Conjunctive adverb
+     */
+    MAJ(131, "Conjunctive adverb"),
+
+    /**
+     * Determiner
+     **/
+    MM(140, "Modifier"),
+
+    /**
+     * General Noun
+     **/
+    NNG(150, "General Noun"),
+
+    /**
+     * Proper Noun
+     **/
+    NNP(151, "Proper Noun"),
+
+    /**
+     * Dependent noun (following nouns)
+     **/
+    NNB(152, "Dependent noun"),
+
+    /**
+     * Dependent noun
+     **/
+    NNBC(153, "Dependent noun"),
+
+    /**
+     * Pronoun
+     **/
+    NP(154, "Pronoun"),
+
+    /**
+     * Numeral
+     **/
+    NR(155, "Numeral"),
+
+    /**
+     * Terminal punctuation (? ! .)
+     **/
+    SF(160, "Terminal punctuation"),
+
+    /**
+     * Chinese character
+     **/
+    SH(161, "Chinese Characeter"),
+
+    /**
+     * Foreign language
+     **/
+    SL(162, "Foreign language"),
+
+    /**
+     * Number
+     **/
+    SN(163, "Number"),
+
+    /**
+     * Space
+     **/
+    SP(164, "Space"),
+
+    /**
+     * Closing brackets
+     **/
+    SSC(165, "Closing brackets"),
+
+    /**
+     * Opening brackets
+     **/
+    SSO(166, "Opening brackets"),
+
+    /**
+     * Separator (· / :)
+     **/
+    SC(167, "Separator"),
+
+    /**
+     * Other symbol
+     **/
+    SY(168, "Other symbol"),
+
+    /**
+     * Ellipsis
+     **/
+    SE(169, "Ellipsis"),
+
+    /**
+     * Adjective
+     **/
+    VA(170, "Adjective"),
+
+    /**
+     * Negative designator
+     **/
+    VCN(171, "Negative designator"),
+
+    /**
+     * Positive designator
+     **/
+    VCP(172, "Positive designator"),
+
+    /**
+     * Verb
+     **/
+    VV(173, "Verb"),
+
+    /**
+     * Auxiliary Verb or Adjective
+     **/
+    VX(174, "Auxiliary Verb or Adjective"),
+
+    /**
+     * Prefix
+     **/
+    XPN(181, "Prefix"),
+
+    /**
+     * Root
+     **/
+    XR(182, "Root"),
+
+    /**
+     * Adjective Suffix
+     **/
+    XSA(183, "Adjective Suffix"),
+
+    /**
+     * Noun Suffix
+     **/
+    XSN(184, "Noun Suffix"),
+
+    /**
+     * Verb Suffix
+     **/
+    XSV(185, "Verb Suffix"),
+
+    /**
+     * Unknown
+     */
+    UNKNOWN(999, "Unknown"),
+
+    /**
+     * Unknown
+     */
+    UNA(-1, "Unknown"),
+
+    /**
+     * Unknown
+     */
+    NA(-1, "Unknown"),
+
+    /**
+     * Unknown
+     */
+    VSV(-1, "Unknown");
+
+    private final int code;
+    private final String desc;
+
+    /**
+     * Returns the code associated with the tag (as defined in pos-id.def).
+     */
+    public int code() {
+      return code;
+    }
+
+    /**
+     * Returns the description associated with the tag.
+     */
+    public String description() {
+      return desc;
+    }
+
+    /**
+     * Returns a new part of speech tag.
+     * @param code The code for the tag.
+     * @param desc The description of the tag.
+     */
+    Tag(int code, String desc) {
+      this.code = code;
+      this.desc = desc;
+    }
+  }
+
+  /**
+   * Returns the {@link Tag} of the provided <code>name</code>.
+   */
+  public static Tag resolveTag(String name) {
+    String tagUpper = name.toUpperCase(Locale.ENGLISH);
+    if (tagUpper.startsWith("J")) {
+      return Tag.J;
+    } else if (tagUpper.startsWith("E")) {
+      return Tag.E;
+    } else {
+      return Tag.valueOf(tagUpper);
+    }
+  }
+
+  /**
+   * Returns the {@link Tag} of the provided <code>tag</code>.
+   */
+  public static Tag resolveTag(byte tag) {
+    assert tag < Tag.values().length;
+    return Tag.values()[tag];
+  }
+
+  /**
+   * Returns the {@link Type} of the provided <code>name</code>.
+   */
+  public static Type resolveType(String name) {
+    if ("*".equals(name)) {
+      return Type.MORPHEME;
+    }
+    return Type.valueOf(name.toUpperCase(Locale.ENGLISH));
+  }
+
+  /**
+   * Returns the {@link Type} of the provided <code>type</code>.
+   */
+  public static Type resolveType(byte type) {
+    assert type < Type.values().length;
+    return Type.values()[type];
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
new file mode 100644
index 0000000..bf2c528
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+import org.apache.lucene.analysis.ko.dict.Dictionary.Morpheme;
+
+/**
+ * Analyzed token with morphological data.
+ */
+public abstract class Token {
+  private final char[] surfaceForm;
+  private final int offset;
+  private final int length;
+
+  private final int startOffset;
+  private final int endOffset;
+  private int posIncr = 1;
+  private int posLen = 1;
+
+  public Token(char[] surfaceForm, int offset, int length, int startOffset, int endOffset) {
+    this.surfaceForm = surfaceForm;
+    this.offset = offset;
+    this.length = length;
+
+    this.startOffset = startOffset;
+    this.endOffset = endOffset;
+  }
+
+  /**
+   * @return surfaceForm
+   */
+  public char[] getSurfaceForm() {
+    return surfaceForm;
+  }
+
+  /**
+   * @return offset into surfaceForm
+   */
+  public int getOffset() {
+    return offset;
+  }
+
+  /**
+   * @return length of surfaceForm
+   */
+  public int getLength() {
+    return length;
+  }
+
+  /**
+   * @return surfaceForm as a String
+   */
+  public String getSurfaceFormString() {
+    return new String(surfaceForm, offset, length);
+  }
+
+  /**
+   * Get the {@link POS.Type} of the token.
+   */
+  public abstract POS.Type getPOSType();
+
+  /**
+   * Get the left part of speech of the token.
+   */
+  public abstract POS.Tag getLeftPOS();
+
+  /**
+   * Get the right part of speech of the token.
+   */
+  public abstract POS.Tag getRightPOS();
+
+  /**
+   * Get the reading of the token.
+   */
+  public abstract String getReading();
+
+  /**
+   * Get the {@link Morpheme} decomposition of the token.
+   */
+  public abstract Morpheme[] getMorphemes();
+
+  /**
+   * Get the start offset of the term in the analyzed text.
+   */
+  public int getStartOffset() {
+    return startOffset;
+  }
+
+  /**
+   * Get the end offset of the term in the analyzed text.
+   */
+  public int getEndOffset() {
+    return endOffset;
+  }
+
+  public void setPositionIncrement(int posIncr) {
+    this.posIncr = posIncr;
+  }
+
+  public int getPositionIncrement() {
+    return posIncr;
+  }
+
+  public void setPositionLength(int posLen) {
+    this.posLen = posLen;
+  }
+
+  public int getPositionLength() {
+    return posLen;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
new file mode 100644
index 0000000..b7a3612
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.BufferedInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+
+/**
+ * Base class for a binary-encoded in-memory dictionary.
+ */
+public abstract class BinaryDictionary implements Dictionary {
+  public static final String TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
+  public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
+  public static final String POSDICT_FILENAME_SUFFIX = "$posDict.dat";
+
+  public static final String DICT_HEADER = "ko_dict";
+  public static final String TARGETMAP_HEADER = "ko_dict_map";
+  public static final String POSDICT_HEADER = "ko_dict_pos";
+  public static final int VERSION = 1;
+
+  private final ByteBuffer buffer;
+  private final int[] targetMapOffsets, targetMap;
+  private final POS.Tag[] posDict;
+
+  protected BinaryDictionary() throws IOException {
+    InputStream mapIS = null, dictIS = null, posIS = null;
+    int[] targetMapOffsets = null, targetMap = null;
+    ByteBuffer buffer = null;
+    boolean success = false;
+    try {
+      mapIS = getResource(TARGETMAP_FILENAME_SUFFIX);
+      mapIS = new BufferedInputStream(mapIS);
+      DataInput in = new InputStreamDataInput(mapIS);
+      CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION);
+      targetMap = new int[in.readVInt()];
+      targetMapOffsets = new int[in.readVInt()];
+      int accum = 0, sourceId = 0;
+      for (int ofs = 0; ofs < targetMap.length; ofs++) {
+        final int val = in.readVInt();
+        if ((val & 0x01) != 0) {
+          targetMapOffsets[sourceId] = ofs;
+          sourceId++;
+        }
+        accum += val >>> 1;
+        targetMap[ofs] = accum;
+      }
+      if (sourceId + 1 != targetMapOffsets.length)
+        throw new IOException("targetMap file format broken");
+      targetMapOffsets[sourceId] = targetMap.length;
+      mapIS.close(); mapIS = null;
+
+      posIS = getResource(POSDICT_FILENAME_SUFFIX);
+      posIS = new BufferedInputStream(posIS);
+      in = new InputStreamDataInput(posIS);
+      CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION);
+      int posSize = in.readVInt();
+      posDict = new POS.Tag[posSize];
+      for (int j = 0; j < posSize; j++) {
+        posDict[j] = POS.resolveTag(in.readByte());
+      }
+      posIS.close(); posIS = null;
+
+      dictIS = getResource(DICT_FILENAME_SUFFIX);
+      // no buffering here, as we load in one large buffer
+      in = new InputStreamDataInput(dictIS);
+      CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION);
+      final int size = in.readVInt();
+      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size);
+      final ReadableByteChannel channel = Channels.newChannel(dictIS);
+      final int read = channel.read(tmpBuffer);
+      if (read != size) {
+        throw new EOFException("Cannot read whole dictionary");
+      }
+      dictIS.close(); dictIS = null;
+      buffer = tmpBuffer.asReadOnlyBuffer();
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(mapIS, dictIS);
+      } else {
+        IOUtils.closeWhileHandlingException(mapIS, dictIS);
+      }
+    }
+
+    this.targetMap = targetMap;
+    this.targetMapOffsets = targetMapOffsets;
+    this.buffer = buffer;
+  }
+  
+  protected final InputStream getResource(String suffix) throws IOException {
+    return getClassResource(getClass(), suffix);
+  }
+  
+  // util, reused by ConnectionCosts and CharacterDefinition
+  public static final InputStream getClassResource(Class<?> clazz, String suffix) throws IOException {
+    final InputStream is = clazz.getResourceAsStream(clazz.getSimpleName() + suffix);
+    if (is == null)
+      throw new FileNotFoundException("Not in classpath: " + clazz.getName().replace('.','/') + suffix);
+    return is;
+  }
+
+  public void lookupWordIds(int sourceId, IntsRef ref) {
+    ref.ints = targetMap;
+    ref.offset = targetMapOffsets[sourceId];
+    // targetMapOffsets always has one more entry pointing behind last:
+    ref.length = targetMapOffsets[sourceId + 1] - ref.offset;
+  }
+
+  @Override
+  public int getLeftId(int wordId) {
+    return buffer.getShort(wordId) >>> 2;
+  }
+  
+  @Override
+  public int getRightId(int wordId) {
+    return buffer.getShort(wordId+2) >>> 2; // Skip left id
+  }
+  
+  @Override
+  public int getWordCost(int wordId) {
+    return buffer.getShort(wordId + 4);  // Skip left and right id
+  }
+
+  @Override
+  public POS.Type getPOSType(int wordId) {
+    byte value = (byte) (buffer.getShort(wordId) & 3);
+    return POS.resolveType(value);
+  }
+
+  @Override
+  public POS.Tag getLeftPOS(int wordId) {
+    return posDict[getLeftId(wordId)];
+  }
+
+  @Override
+  public POS.Tag getRightPOS(int wordId) {
+    POS.Type type = getPOSType(wordId);
+    if (type == POS.Type.MORPHEME || type == POS.Type.COMPOUND || hasSinglePOS(wordId)) {
+      return getLeftPOS(wordId);
+    } else {
+      byte value = buffer.get(wordId + 6);
+      return POS.resolveTag(value);
+    }
+  }
+
+  @Override
+  public String getReading(int wordId) {
+    if (hasReadingData(wordId)) {
+      int offset = wordId + 6;
+      return readString(offset);
+    }
+    return null;
+  }
+
+  @Override
+  public Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len) {
+    POS.Type posType = getPOSType(wordId);
+    if (posType == POS.Type.MORPHEME) {
+      return null;
+    }
+    int offset = wordId + 6;
+    boolean hasSinglePos = hasSinglePOS(wordId);
+    if (hasSinglePos == false) {
+      offset ++; // skip rightPOS
+    }
+    int length = buffer.get(offset++);
+    if (length == 0) {
+      return null;
+    }
+    Morpheme[] morphemes = new Morpheme[length];
+    int surfaceOffset = 0;
+    final POS.Tag leftPOS = getLeftPOS(wordId);
+    for (int i = 0; i < length; i++) {
+      final String form;
+      final POS.Tag tag = hasSinglePos ? leftPOS : POS.resolveTag(buffer.get(offset++));
+      if (posType == POS.Type.INFLECT) {
+        form = readString(offset);
+        offset += form.length() * 2 + 1;
+      } else {
+        int formLen = buffer.get(offset++);
+        form = new String(surfaceForm, off+surfaceOffset, formLen);
+        surfaceOffset += formLen;
+      }
+      morphemes[i] = new Morpheme(tag, form);
+    }
+    return morphemes;
+  }
+
+  private String readString(int offset) {
+    int strOffset = offset;
+    int len = buffer.get(strOffset++);
+    char text[] = new char[len];
+    for (int i = 0; i < len; i++) {
+      text[i] = buffer.getChar(strOffset + (i<<1));
+    }
+    return new String(text);
+  }
+
+  private boolean hasSinglePOS(int wordId) {
+    return (buffer.getShort(wordId+2) & HAS_SINGLE_POS) != 0;
+  }
+
+  private boolean hasReadingData(int wordId) {
+    return (buffer.getShort(wordId+2) & HAS_READING) != 0;
+  }
+
+  /** flag that the entry has a single part of speech (leftPOS) */
+  public static final int HAS_SINGLE_POS = 1;
+
+  /** flag that the entry has reading data. otherwise reading is surface form */
+  public static final int HAS_READING = 2;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java
new file mode 100644
index 0000000..bc81cba
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Character category data.
+ */
+public final class CharacterDefinition {
+
+  public static final String FILENAME_SUFFIX = ".dat";
+  public static final String HEADER = "ko_cd";
+  public static final int VERSION = 1;
+
+  public static final int CLASS_COUNT = CharacterClass.values().length;
+
+  // only used internally for lookup:
+  private enum CharacterClass {
+    NGRAM, DEFAULT, SPACE, SYMBOL, NUMERIC, ALPHA, CYRILLIC, GREEK, HIRAGANA, KATAKANA, KANJI, HANGUL, HANJA, HANJANUMERIC;
+  }
+
+  private final byte[] characterCategoryMap = new byte[0x10000];
+
+  private final boolean[] invokeMap = new boolean[CLASS_COUNT];
+  private final boolean[] groupMap = new boolean[CLASS_COUNT];
+
+  // the classes:
+  public static final byte NGRAM = (byte) CharacterClass.NGRAM.ordinal();
+  public static final byte DEFAULT = (byte) CharacterClass.DEFAULT.ordinal();
+  public static final byte SPACE = (byte) CharacterClass.SPACE.ordinal();
+  public static final byte SYMBOL = (byte) CharacterClass.SYMBOL.ordinal();
+  public static final byte NUMERIC = (byte) CharacterClass.NUMERIC.ordinal();
+  public static final byte ALPHA = (byte) CharacterClass.ALPHA.ordinal();
+  public static final byte CYRILLIC = (byte) CharacterClass.CYRILLIC.ordinal();
+  public static final byte GREEK = (byte) CharacterClass.GREEK.ordinal();
+  public static final byte HIRAGANA = (byte) CharacterClass.HIRAGANA.ordinal();
+  public static final byte KATAKANA = (byte) CharacterClass.KATAKANA.ordinal();
+  public static final byte KANJI = (byte) CharacterClass.KANJI.ordinal();
+  public static final byte HANGUL = (byte) CharacterClass.HANGUL.ordinal();
+  public static final byte HANJA = (byte) CharacterClass.HANJA.ordinal();
+  public static final byte HANJANUMERIC = (byte) CharacterClass.HANJANUMERIC.ordinal();
+  
+  private CharacterDefinition() throws IOException {
+    InputStream is = null;
+    boolean success = false;
+    try {
+      is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX);
+      is = new BufferedInputStream(is);
+      final DataInput in = new InputStreamDataInput(is);
+      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
+      in.readBytes(characterCategoryMap, 0, characterCategoryMap.length);
+      for (int i = 0; i < CLASS_COUNT; i++) {
+        final byte b = in.readByte();
+        invokeMap[i] = (b & 0x01) != 0;
+        groupMap[i] = (b & 0x02) != 0;
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(is);
+      } else {
+        IOUtils.closeWhileHandlingException(is);
+      }
+    }
+  }
+  
+  public byte getCharacterClass(char c) {
+    return characterCategoryMap[c];
+  }
+  
+  public boolean isInvoke(char c) {
+    return invokeMap[characterCategoryMap[c]];
+  }
+  
+  public boolean isGroup(char c) {
+    return groupMap[characterCategoryMap[c]];
+  }
+
+  public boolean isHanja(char c) {
+    final byte characterClass = getCharacterClass(c);
+    return characterClass == HANJA || characterClass == HANJANUMERIC;
+  }
+
+  public boolean isHangul(char c) {
+    return getCharacterClass(c) == HANGUL;
+  }
+
+  public boolean hasCoda(char ch){
+    if (((ch - 0xAC00) % 0x001C) == 0) {
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  public static byte lookupCharacterClass(String characterClassName) {
+    return (byte) CharacterClass.valueOf(characterClassName).ordinal();
+  }
+
+  public static CharacterDefinition getInstance() {
+    return SingletonHolder.INSTANCE;
+  }
+  
+  private static class SingletonHolder {
+    static final CharacterDefinition INSTANCE;
+    static {
+      try {
+        INSTANCE = new CharacterDefinition();
+      } catch (IOException ioe) {
+        throw new RuntimeException("Cannot load CharacterDefinition.", ioe);
+      }
+    }
+   }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
new file mode 100644
index 0000000..95d0e8b
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * n-gram connection cost data
+ */
+public final class ConnectionCosts {
+  
+  public static final String FILENAME_SUFFIX = ".dat";
+  public static final String HEADER = "ko_cc";
+  public static final int VERSION = 1;
+
+  private final ByteBuffer buffer;
+  private final int forwardSize;
+
+  private ConnectionCosts() throws IOException {
+    InputStream is = null;
+    ByteBuffer buffer = null;
+    boolean success = false;
+    try {
+      is = BinaryDictionary.getClassResource(getClass(), FILENAME_SUFFIX);
+      is = new BufferedInputStream(is);
+      final DataInput in = new InputStreamDataInput(is);
+      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
+      this.forwardSize = in.readVInt();
+      int backwardSize = in.readVInt();
+      int size = forwardSize * backwardSize;
+
+      // copy the matrix into a direct byte buffer
+      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size*2);
+      int accum = 0;
+      for (int j = 0; j < backwardSize; j++) {
+        for (int i = 0; i < forwardSize; i++) {
+          accum += in.readZInt();
+          tmpBuffer.putShort((short) accum);
+        }
+      }
+      buffer = tmpBuffer.asReadOnlyBuffer();
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(is);
+      } else {
+        IOUtils.closeWhileHandlingException(is);
+      }
+    }
+    this.buffer = buffer;
+  }
+  
+  public int get(int forwardId, int backwardId) {
+    // map 2d matrix into a single dimension short array
+    int offset = (backwardId * forwardSize + forwardId) * 2;
+    return buffer.getShort(offset);
+  }
+  
+  public static ConnectionCosts getInstance() {
+    return SingletonHolder.INSTANCE;
+  }
+  
+  private static class SingletonHolder {
+    static final ConnectionCosts INSTANCE;
+    static {
+      try {
+        INSTANCE = new ConnectionCosts();
+      } catch (IOException ioe) {
+        throw new RuntimeException("Cannot load ConnectionCosts.", ioe);
+      }
+    }
+   }
+  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/Dictionary.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/Dictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/Dictionary.java
new file mode 100644
index 0000000..23101b3
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/Dictionary.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import org.apache.lucene.analysis.ko.POS.Tag;
+import org.apache.lucene.analysis.ko.POS.Type;
+
+/**
+ * Dictionary interface for retrieving morphological data
+ * by id.
+ */
+public interface Dictionary {
+  /**
+   * A morpheme extracted from a compound token.
+   */
+  class Morpheme {
+    public final Tag posTag;
+    public final String surfaceForm;
+
+    public Morpheme(Tag posTag, String surfaceForm) {
+      this.posTag = posTag;
+      this.surfaceForm = surfaceForm;
+    }
+  }
+
+  /**
+   * Get left id of specified word
+   */
+  int getLeftId(int wordId);
+  
+  /**
+   * Get right id of specified word
+   */
+  int getRightId(int wordId);
+  
+  /**
+   * Get word cost of specified word
+   */
+  int getWordCost(int wordId);
+
+  /**
+   * Get the {@link Type} of specified word (morpheme, compound, inflect or pre-analysis)
+   */
+  Type getPOSType(int wordId);
+
+  /**
+   * Get the left {@link Tag} of specfied word.
+   *
+   * For {@link Type#MORPHEME} and {@link Type#COMPOUND} the left and right POS are the same.
+   */
+  Tag getLeftPOS(int wordId);
+
+  /**
+   * Get the right {@link Tag} of specfied word.
+   *
+   * For {@link Type#MORPHEME} and {@link Type#COMPOUND} the left and right POS are the same.
+   */
+  Tag getRightPOS(int wordId);
+
+  /**
+   * Get the reading of specified word (mainly used for Hanja to Hangul conversion).
+   */
+  String getReading(int wordId);
+
+  /**
+   * Get the morphemes of specified word (e.g. 가깝으나: 가깝 + 으나).
+   */
+  Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len);
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
new file mode 100644
index 0000000..94408c7
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.BufferedInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+
+/**
+ * Binary dictionary implementation for a known-word dictionary model:
+ * Words are encoded into an FST mapping to a list of wordIDs.
+ */
+public final class TokenInfoDictionary extends BinaryDictionary {
+
+  public static final String FST_FILENAME_SUFFIX = "$fst.dat";
+
+  private final TokenInfoFST fst;
+  
+  private TokenInfoDictionary() throws IOException {
+    super();
+    InputStream is = null;
+    FST<Long> fst = null;
+    boolean success = false;
+    try {
+      is = getResource(FST_FILENAME_SUFFIX);
+      is = new BufferedInputStream(is);
+      fst = new FST<>(new InputStreamDataInput(is), PositiveIntOutputs.getSingleton());
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(is);
+      } else {
+        IOUtils.closeWhileHandlingException(is);
+      }
+    }
+    this.fst = new TokenInfoFST(fst);
+  }
+  
+  public TokenInfoFST getFST() {
+    return fst;
+  }
+   
+  public static TokenInfoDictionary getInstance() {
+    return SingletonHolder.INSTANCE;
+  }
+  
+  private static class SingletonHolder {
+    static final TokenInfoDictionary INSTANCE;
+    static {
+      try {
+        INSTANCE = new TokenInfoDictionary();
+      } catch (IOException ioe) {
+        throw new RuntimeException("Cannot load TokenInfoDictionary.", ioe);
+      }
+    }
+   }
+  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java
new file mode 100644
index 0000000..7f9bec6
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoFST.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.IOException;
+
+import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.FST;
+
+/**
+ * Thin wrapper around an FST with root-arc caching for Hangul syllables (11,172 arcs).
+ */
+public final class TokenInfoFST {
+  private final FST<Long> fst;
+
+  private final int cacheCeiling;
+  private final FST.Arc<Long> rootCache[];
+  
+  public final Long NO_OUTPUT;
+
+  public TokenInfoFST(FST<Long> fst) throws IOException {
+    this.fst = fst;
+    this.cacheCeiling = 0xD7A3;
+    NO_OUTPUT = fst.outputs.getNoOutput();
+    rootCache = cacheRootArcs();
+  }
+  
+  @SuppressWarnings({"rawtypes","unchecked"})
+  private FST.Arc<Long>[] cacheRootArcs() throws IOException {
+    FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0xAC00)];
+    FST.Arc<Long> firstArc = new FST.Arc<>();
+    fst.getFirstArc(firstArc);
+    FST.Arc<Long> arc = new FST.Arc<>();
+    final FST.BytesReader fstReader = fst.getBytesReader();
+    // TODO: jump to AC00, readNextRealArc to ceiling? (just be careful we don't add bugs)
+    for (int i = 0; i < rootCache.length; i++) {
+      if (fst.findTargetArc(0xAC00 + i, firstArc, arc, fstReader) != null) {
+        rootCache[i] = new FST.Arc<Long>().copyFrom(arc);
+      }
+    }
+    return rootCache;
+  }
+
+  public FST.Arc<Long> findTargetArc(int ch, FST.Arc<Long> follow, FST.Arc<Long> arc, boolean useCache, FST.BytesReader fstReader) throws IOException {
+    if (useCache && ch >= 0xAC00 && ch <= cacheCeiling) {
+      assert ch != FST.END_LABEL;
+      final Arc<Long> result = rootCache[ch - 0xAC00];
+      if (result == null) {
+        return null;
+      } else {
+        arc.copyFrom(result);
+        return arc;
+      }
+    } else {
+      return fst.findTargetArc(ch, follow, arc, fstReader);
+    }
+  }
+  
+  public Arc<Long> getFirstArc(FST.Arc<Long> arc) {
+    return fst.getFirstArc(arc);
+  }
+
+  public FST.BytesReader getBytesReader() {
+    return fst.getBytesReader();
+  }
+
+  /** @lucene.internal for testing only */
+  FST<Long> getInternalFST() {
+    return fst;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
new file mode 100644
index 0000000..6d56b92
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+
+import java.io.IOException;
+
+/**
+ * Dictionary for unknown-word handling.
+ */
+public final class UnknownDictionary extends BinaryDictionary {
+  private final CharacterDefinition characterDefinition = CharacterDefinition.getInstance();
+
+  private UnknownDictionary() throws IOException {
+    super();
+  }
+
+  public CharacterDefinition getCharacterDefinition() {
+    return characterDefinition;
+  }
+
+  public static UnknownDictionary getInstance() {
+    return SingletonHolder.INSTANCE;
+  }
+
+  @Override
+  public String getReading(int wordId) {
+    return null;
+  }
+
+  @Override
+  public Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len) {
+    return null;
+  }
+
+  private static class SingletonHolder {
+    static final UnknownDictionary INSTANCE;
+
+    static {
+      try {
+        INSTANCE = new UnknownDictionary();
+      } catch (IOException ioe) {
+        throw new RuntimeException("Cannot load UnknownDictionary.", ioe);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
new file mode 100644
index 0000000..c5378a9
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+
+/**
+ * Class for building a User Dictionary.
+ * This class allows for adding custom nouns (세종) or compounds (세종시 세종 시).
+ */
+public final class UserDictionary implements Dictionary {
+  // text -> wordID
+  private final TokenInfoFST fst;
+
+  public static final int WORD_COST = -100000;
+
+  // NNG left
+  public static final short LEFT_ID = 1781;
+
+  // NNG right
+  public static final short RIGHT_ID = 3534;
+  // NNG right with hangul and a coda on the last char
+  public static final short RIGHT_ID_T = 3534;
+  // NNG right with hangul and no coda on the last char
+  public static final short RIGHT_ID_F = 3535;
+
+  // length, length... indexed by compound ID or null for simple noun
+  private final int segmentations[][];
+  private final short[] rightIds;
+
+  public static UserDictionary open(Reader reader) throws IOException {
+
+    BufferedReader br = new BufferedReader(reader);
+    String line = null;
+    List<String> entries = new ArrayList<>();
+
+    // text + optional segmentations
+    while ((line = br.readLine()) != null) {
+      // Remove comments
+      line = line.replaceAll("#.*$", "");
+
+      // Skip empty lines or comment lines
+      if (line.trim().length() == 0) {
+        continue;
+      }
+      entries.add(line);
+    }
+
+    if (entries.isEmpty()) {
+      return null;
+    } else {
+      return new UserDictionary(entries);
+    }
+  }
+
+  private UserDictionary(List<String> entries) throws IOException {
+    final CharacterDefinition charDef = CharacterDefinition.getInstance();
+    Collections.sort(entries,
+        Comparator.comparing(e -> e.split("\\s+")[0]));
+
+    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
+    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput);
+    IntsRefBuilder scratch = new IntsRefBuilder();
+
+    String lastToken = null;
+    List<int[]> segmentations = new ArrayList<>(entries.size());
+    List<Short> rightIds = new ArrayList<>(entries.size());
+    long ord = 0;
+    for (String entry : entries) {
+      String[] splits = entry.split("\\s+");
+      String token = splits[0];
+      if (lastToken != null && token.equals(lastToken)) {
+        continue;
+      }
+      char lastChar = entry.charAt(entry.length()-1);
+      if (charDef.isHangul(lastChar)) {
+        if (charDef.hasCoda(lastChar)) {
+          rightIds.add(RIGHT_ID_T);
+        } else {
+          rightIds.add(RIGHT_ID_F);
+        }
+      } else {
+        rightIds.add(RIGHT_ID);
+      }
+
+      if (splits.length == 1) {
+        segmentations.add(null);
+      } else {
+        int[] length = new int[splits.length-1];
+        int offset = 0;
+        for (int i = 1; i < splits.length; i++) {
+          length[i-1] = splits[i].length();
+          offset += splits[i].length();
+        }
+        if (offset > token.length()) {
+          throw new IllegalArgumentException("Illegal user dictionary entry " + entry +
+              " - the segmentation is bigger than the surface form (" + token + ")");
+        }
+        segmentations.add(length);
+      }
+
+      // add mapping to FST
+      scratch.grow(token.length());
+      scratch.setLength(token.length());
+      for (int i = 0; i < token.length(); i++) {
+        scratch.setIntAt(i, (int) token.charAt(i));
+      }
+      fstBuilder.add(scratch.get(), ord);
+      lastToken = token;
+      ord ++;
+    }
+    this.fst = new TokenInfoFST(fstBuilder.finish());
+    this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
+    this.rightIds = new short[rightIds.size()];
+    for (int i = 0; i < rightIds.size(); i++) {
+      this.rightIds[i] = rightIds.get(i);
+    }
+  }
+
+  public TokenInfoFST getFST() {
+    return fst;
+  }
+
+  @Override
+  public int getLeftId(int wordId) {
+    return LEFT_ID;
+  }
+  
+  @Override
+  public int getRightId(int wordId) {
+    return rightIds[wordId];
+  }
+  
+  @Override
+  public int getWordCost(int wordId) {
+    return WORD_COST;
+  }
+
+  @Override
+  public POS.Type getPOSType(int wordId) {
+    if (segmentations[wordId] == null) {
+      return POS.Type.MORPHEME;
+    } else {
+      return POS.Type.COMPOUND;
+    }
+  }
+
+  @Override
+  public POS.Tag getLeftPOS(int wordId) {
+    return POS.Tag.NNG;
+  }
+
+  @Override
+  public POS.Tag getRightPOS(int wordId) {
+    return POS.Tag.NNG;
+  }
+
+  @Override
+  public String getReading(int wordId) {
+    return null;
+  }
+
+  @Override
+  public Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len) {
+    int[] segs = segmentations[wordId];
+    if (segs == null) {
+      return null;
+    }
+    int offset = 0;
+    Morpheme[] morphemes = new Morpheme[segs.length];
+    for (int i = 0; i < segs.length; i++) {
+      morphemes[i] = new Morpheme(POS.Tag.NNG, new String(surfaceForm, off+offset, segs[i]));
+      offset += segs[i];
+    }
+    return morphemes;
+  }
+
+  /**
+   * Lookup words in text
+   * @param chars text
+   * @param off offset into text
+   * @param len length of text
+   * @return array of wordId
+   */
+  public List<Integer> lookup(char[] chars, int off, int len) throws IOException {
+    List<Integer> result = new ArrayList<>();
+    final FST.BytesReader fstReader = fst.getBytesReader();
+
+    FST.Arc<Long> arc = new FST.Arc<>();
+    int end = off + len;
+    for (int startOffset = off; startOffset < end; startOffset++) {
+      arc = fst.getFirstArc(arc);
+      int output = 0;
+      int remaining = end - startOffset;
+      for (int i = 0; i < remaining; i++) {
+        int ch = chars[startOffset+i];
+        if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
+          break; // continue to next position
+        }
+        output += arc.output.intValue();
+        if (arc.isFinal()) {
+          final int finalOutput = output + arc.nextFinalOutput.intValue();
+          result.add(finalOutput);
+        }
+      }
+    }
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/package-info.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/package-info.java
new file mode 100644
index 0000000..f3fb377
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Korean dictionary implementation.
+ */
+package org.apache.lucene.analysis.ko.dict;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/package-info.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/package-info.java
new file mode 100644
index 0000000..200fd04
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Analyzer for Korean.
+ */
+package org.apache.lucene.analysis.ko;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java
new file mode 100644
index 0000000..1963c98
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.tokenattributes;
+
+import org.apache.lucene.analysis.ko.POS.Tag;
+import org.apache.lucene.analysis.ko.POS.Type;
+import org.apache.lucene.analysis.ko.Token;
+import org.apache.lucene.analysis.ko.dict.Dictionary.Morpheme;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * Part of Speech attributes for Korean.
+ * @lucene.experimental
+ */
+public interface PartOfSpeechAttribute extends Attribute {
+  /**
+   * Get the {@link Type} of the token.
+   */
+  Type getPOSType();
+
+  /**
+   * Get the left part of speech of the token.
+   */
+  Tag getLeftPOS();
+
+  /**
+   * Get the right part of speech of the token.
+   */
+  Tag getRightPOS();
+
+  /**
+   * Get the {@link Morpheme} decomposition of the token.
+   */
+  Morpheme[] getMorphemes();
+
+  /**
+   * Set the current token.
+   */
+  void setToken(Token token);
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
new file mode 100644
index 0000000..2e51689
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.tokenattributes;
+
+import org.apache.lucene.analysis.ko.POS.Type;
+import org.apache.lucene.analysis.ko.POS.Tag;
+import org.apache.lucene.analysis.ko.Token;
+import org.apache.lucene.analysis.ko.dict.Dictionary.Morpheme;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+
+/**
+ * Part of Speech attributes for Korean.
+ * @lucene.experimental
+ */
+public class PartOfSpeechAttributeImpl extends AttributeImpl implements PartOfSpeechAttribute, Cloneable {
+  private Token token;
+
+  @Override
+  public Type getPOSType() {
+    return token == null ? null : token.getPOSType();
+  }
+
+  @Override
+  public Tag getLeftPOS() {
+    return token == null ? null : token.getLeftPOS();
+  }
+
+  @Override
+  public Tag getRightPOS() {
+    return token == null ? null : token.getRightPOS();
+  }
+
+  @Override
+  public Morpheme[] getMorphemes() {
+    return token == null ? null : token.getMorphemes();
+  }
+
+  @Override
+  public void setToken(Token token) {
+    this.token = token;
+  }
+
+  @Override
+  public void clear() {
+    token = null;
+  }
+
+  @Override
+  public void reflectWith(AttributeReflector reflector) {
+    reflector.reflect(PartOfSpeechAttribute.class, "posType", getPOSType().name());
+    Tag leftPOS = getLeftPOS();
+    reflector.reflect(PartOfSpeechAttribute.class, "leftPOS", leftPOS.name() + "(" + leftPOS.description() + ")");
+    Tag rightPOS = getRightPOS();
+    reflector.reflect(PartOfSpeechAttribute.class, "rightPOS", rightPOS.name() + "(" + rightPOS.description() + ")");
+    reflector.reflect(PartOfSpeechAttribute.class, "morphemes", displayMorphemes(getMorphemes()));
+  }
+
+  private String displayMorphemes(Morpheme[] morphemes) {
+    if (morphemes == null) {
+      return null;
+    }
+    StringBuilder builder = new StringBuilder();
+    for (Morpheme morpheme : morphemes) {
+      if (builder.length() > 0) {
+        builder.append("+");
+      }
+      builder.append(morpheme.surfaceForm + "/" + morpheme.posTag.name() + "(" + morpheme.posTag.description() + ")");
+    }
+    return builder.toString();
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    PartOfSpeechAttribute t = (PartOfSpeechAttribute) target;
+    t.setToken(token);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttribute.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttribute.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttribute.java
new file mode 100644
index 0000000..a5eb24f
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttribute.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.tokenattributes;
+
+import org.apache.lucene.analysis.ko.Token;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * Attribute for Korean reading data
+ * <p>
+ * Note: in some cases this value may not be applicable, and will be null.
+ * @lucene.experimental
+ */
+public interface ReadingAttribute extends Attribute {
+  /**
+   * Get the reading of the token.
+   */
+  String getReading();
+
+  /**
+   * Set the current token.
+   */
+  void setToken(Token token);
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttributeImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttributeImpl.java
new file mode 100644
index 0000000..a64a4b3
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/ReadingAttributeImpl.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.tokenattributes;
+
+import org.apache.lucene.analysis.ko.Token;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+
+/**
+ * Attribute for Korean reading data
+ * @lucene.experimental
+ */
+public class ReadingAttributeImpl extends AttributeImpl implements ReadingAttribute, Cloneable {
+  private Token token;
+  
+  @Override
+  public String getReading() {
+    return token == null ? null : token.getReading();
+  }
+
+  @Override
+  public void setToken(Token token) {
+    this.token = token;
+  }
+
+  @Override
+  public void clear() {
+    token = null;
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    ReadingAttribute t = (ReadingAttribute) target;
+    t.setToken(token);
+  }
+  
+  @Override
+  public void reflectWith(AttributeReflector reflector) {
+    reflector.reflect(ReadingAttribute.class, "reading", getReading());
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/package-info.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/package-info.java
new file mode 100644
index 0000000..5a33cfb
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Additional Korean-specific Attributes for text analysis.
+ */
+package org.apache.lucene.analysis.ko.tokenattributes;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java
new file mode 100644
index 0000000..f911b55
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CSVUtil.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.util;
+
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Utility class for parsing CSV text
+ */
+public final class CSVUtil {
+  private static final char QUOTE = '"';
+  
+  private static final char COMMA = ',';
+  
+  private static final Pattern QUOTE_REPLACE_PATTERN = Pattern.compile("^\"([^\"]+)\"$");
+  
+  private static final String ESCAPED_QUOTE = "\"\"";
+  
+  private CSVUtil() {} // no instance!!!
+  
+  /**
+   * Parse CSV line
+   * @param line line containing csv-encoded data
+   * @return Array of values
+   */
+  public static String[] parse(String line) {
+    boolean insideQuote = false;
+    ArrayList<String> result = new ArrayList<>();
+    int quoteCount = 0;
+    StringBuilder sb = new StringBuilder();
+    for(int i = 0; i < line.length(); i++) {
+      char c = line.charAt(i);
+      
+      if(c == QUOTE) {
+        insideQuote = !insideQuote;
+        quoteCount++;
+      }
+      
+      if(c == COMMA && !insideQuote) {
+        String value = sb.toString();
+        value = unQuoteUnEscape(value);
+        result.add(value);
+        sb.setLength(0);
+        continue;
+      }
+      
+      sb.append(c);
+    }
+    
+    result.add(sb.toString());
+    
+    // Validate
+    if(quoteCount % 2 != 0) {
+      return new String[0];
+    }
+    
+    return result.toArray(new String[result.size()]);
+  }
+  
+  private static String unQuoteUnEscape(String original) {
+    String result = original;
+    
+    // Unquote
+    if (result.indexOf('\"') >= 0) {
+      Matcher m = QUOTE_REPLACE_PATTERN.matcher(original);
+      if(m.matches()) {
+        result = m.group(1);
+      }
+    
+      // Unescape
+      if (result.indexOf(ESCAPED_QUOTE) >= 0) {
+        result = result.replace(ESCAPED_QUOTE, "\"");
+      }
+    }
+    
+    return result;
+    
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/package-info.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/package-info.java
new file mode 100644
index 0000000..8a25fdf
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Nori utility classes.
+ */
+package org.apache.lucene.analysis.ko.util;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/overview.html b/lucene/analysis/nori/src/java/overview.html
new file mode 100644
index 0000000..8ed9efd
--- /dev/null
+++ b/lucene/analysis/nori/src/java/overview.html
@@ -0,0 +1,34 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+  <head>
+    <title>
+      Apache Lucene Nori Analyzer
+    </title>
+  </head>
+  <body>
+    Nori is a morphological analyzer for Korean text.
+    <p>
+      This module provides support for Korean text analysis, including features
+      such as part-of-speech tagging, part-of-speech filtering, decompounding and
+      hanja to hangul conversion.
+    </p>
+    <p>
+      For an introduction to Lucene's analysis API, see the {@link org.apache.lucene.analysis} package documentation.
+    </p>
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
new file mode 100644
index 0000000..07a41c7
--- /dev/null
+++ b/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.lucene.analysis.ko.KoreanReadingFormFilterFactory
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory b/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
new file mode 100644
index 0000000..e541b0e
--- /dev/null
+++ b/lucene/analysis/nori/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.lucene.analysis.ko.KoreanTokenizerFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/CharacterDefinition.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/CharacterDefinition.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/CharacterDefinition.dat
new file mode 100644
index 0000000..97b23bb
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/CharacterDefinition.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/ConnectionCosts.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/ConnectionCosts.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/ConnectionCosts.dat
new file mode 100644
index 0000000..7fad91e
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/ConnectionCosts.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$buffer.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$buffer.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$buffer.dat
new file mode 100644
index 0000000..6958664
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$buffer.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$fst.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$fst.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$fst.dat
new file mode 100644
index 0000000..17b531f
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$fst.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$posDict.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$posDict.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$posDict.dat
new file mode 100644
index 0000000..c4db798
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$posDict.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$targetMap.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$targetMap.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$targetMap.dat
new file mode 100644
index 0000000..7c0823c
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary$targetMap.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$buffer.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$buffer.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$buffer.dat
new file mode 100644
index 0000000..c9a18ee
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$buffer.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$posDict.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$posDict.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$posDict.dat
new file mode 100644
index 0000000..f1ceebd
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$posDict.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$targetMap.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$targetMap.dat b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$targetMap.dat
new file mode 100644
index 0000000..abcdf90
Binary files /dev/null and b/lucene/analysis/nori/src/resources/org/apache/lucene/analysis/ko/dict/UnknownDictionary$targetMap.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e851b89c/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
new file mode 100644
index 0000000..cc1ee00
--- /dev/null
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/StringMockResourceLoader.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko;
+
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+/** Fake resource loader for tests: works if you want to fake reading a single file */
+class StringMockResourceLoader implements ResourceLoader {
+  String text;
+
+  public StringMockResourceLoader(String text) {
+    this.text = text;
+  }
+
+  @Override
+  public <T> Class<? extends T> findClass(String cname, Class<T> expectedType) {
+    try {
+      return Class.forName(cname).asSubclass(expectedType);
+    } catch (Exception e) {
+      throw new RuntimeException("Cannot load class: " + cname, e);
+    }
+  }
+
+  @Override
+  public <T> T newInstance(String cname, Class<T> expectedType) {
+    Class<? extends T> clazz = findClass(cname, expectedType);
+    try {
+      return clazz.newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException("Cannot create instance: " + cname, e);
+    }
+  }
+
+  @Override
+  public InputStream openResource(String resource) throws IOException {
+    return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
+  }
+}

[08/46] lucene-solr:jira/solr-11833: Fix lint error of unused imports in LeaderVoteWaitTimeoutTest

Posted by ab...@apache.org.

Fix lint error of unused imports in LeaderVoteWaitTimeoutTest


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/19b4483c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/19b4483c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/19b4483c

Branch: refs/heads/jira/solr-11833
Commit: 19b4483ca047d0e3ec61df526eac09b1602b6d57
Parents: 24f10c4
Author: Simon Willnauer <si...@apache.org>
Authored: Thu Apr 12 12:28:28 2018 +0200
Committer: Simon Willnauer <si...@apache.org>
Committed: Thu Apr 12 12:44:34 2018 +0200

----------------------------------------------------------------------
 .../src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java | 3 ---
 1 file changed, 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19b4483c/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
index 6d32c3a..34b9350 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
@@ -25,7 +25,6 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 import org.apache.solr.JSONTestUtil;
 import org.apache.solr.client.solrj.SolrServerException;
@@ -34,8 +33,6 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.cloud.CollectionStatePredicate;
-import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.util.NamedList;

[45/46] lucene-solr:jira/solr-11833: Merge branch 'master' into jira/solr-11833

Posted by ab...@apache.org.

Merge branch 'master' into jira/solr-11833


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5bbe689d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5bbe689d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5bbe689d

Branch: refs/heads/jira/solr-11833
Commit: 5bbe689d6c75d74757932e74664f319a52695a1f
Parents: 76461f3 1e759bc
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Mon Apr 16 13:25:41 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Mon Apr 16 13:25:41 2018 +0200

----------------------------------------------------------------------
 dev-tools/idea/.idea/modules.xml                |   1 +
 dev-tools/idea/lucene/analysis/nori/nori.iml    |  22 +
 .../maven/lucene/analysis/nori/pom.xml.template |  75 ++
 .../maven/lucene/analysis/pom.xml.template      |   1 +
 lucene/CHANGES.txt                              |  11 +
 lucene/NOTICE.txt                               |  12 +
 lucene/analysis/README.txt                      |   5 +
 lucene/analysis/build.xml                       |   7 +-
 lucene/analysis/nori/build.xml                  | 135 +++
 lucene/analysis/nori/ivy.xml                    |  33 +
 .../lucene/analysis/ko/DecompoundToken.java     |  69 ++
 .../lucene/analysis/ko/DictionaryToken.java     | 100 ++
 .../lucene/analysis/ko/GraphvizFormatter.java   | 180 ++++
 .../lucene/analysis/ko/KoreanAnalyzer.java      |  78 ++
 .../ko/KoreanPartOfSpeechStopFilter.java        |  85 ++
 .../ko/KoreanPartOfSpeechStopFilterFactory.java |  51 +
 .../analysis/ko/KoreanReadingFormFilter.java    |  51 +
 .../ko/KoreanReadingFormFilterFactory.java      |  42 +
 .../lucene/analysis/ko/KoreanTokenizer.java     | 957 +++++++++++++++++++
 .../analysis/ko/KoreanTokenizerFactory.java     |  89 ++
 .../java/org/apache/lucene/analysis/ko/POS.java | 304 ++++++
 .../org/apache/lucene/analysis/ko/Token.java    | 125 +++
 .../analysis/ko/dict/BinaryDictionary.java      | 239 +++++
 .../analysis/ko/dict/CharacterDefinition.java   | 136 +++
 .../analysis/ko/dict/ConnectionCosts.java       |  96 ++
 .../lucene/analysis/ko/dict/Dictionary.java     |  83 ++
 .../analysis/ko/dict/TokenInfoDictionary.java   |  77 ++
 .../lucene/analysis/ko/dict/TokenInfoFST.java   |  85 ++
 .../analysis/ko/dict/UnknownDictionary.java     |  61 ++
 .../lucene/analysis/ko/dict/UserDictionary.java | 235 +++++
 .../lucene/analysis/ko/dict/package-info.java   |  21 +
 .../apache/lucene/analysis/ko/package-info.java |  21 +
 .../tokenattributes/PartOfSpeechAttribute.java  |  54 ++
 .../PartOfSpeechAttributeImpl.java              |  92 ++
 .../ko/tokenattributes/ReadingAttribute.java    |  38 +
 .../tokenattributes/ReadingAttributeImpl.java   |  55 ++
 .../ko/tokenattributes/package-info.java        |  21 +
 .../apache/lucene/analysis/ko/util/CSVUtil.java |  95 ++
 .../lucene/analysis/ko/util/package-info.java   |  21 +
 lucene/analysis/nori/src/java/overview.html     |  34 +
 ...ache.lucene.analysis.util.TokenFilterFactory |  16 +
 ...apache.lucene.analysis.util.TokenizerFactory |  16 +
 .../analysis/ko/dict/CharacterDefinition.dat    | Bin 0 -> 65564 bytes
 .../lucene/analysis/ko/dict/ConnectionCosts.dat | Bin 0 -> 11178837 bytes
 .../ko/dict/TokenInfoDictionary$buffer.dat      | Bin 0 -> 7245625 bytes
 .../ko/dict/TokenInfoDictionary$fst.dat         | Bin 0 -> 5640925 bytes
 .../ko/dict/TokenInfoDictionary$posDict.dat     | Bin 0 -> 2712 bytes
 .../ko/dict/TokenInfoDictionary$targetMap.dat   | Bin 0 -> 811783 bytes
 .../ko/dict/UnknownDictionary$buffer.dat        | Bin 0 -> 101 bytes
 .../ko/dict/UnknownDictionary$posDict.dat       | Bin 0 -> 1823 bytes
 .../ko/dict/UnknownDictionary$targetMap.dat     | Bin 0 -> 36 bytes
 .../analysis/ko/StringMockResourceLoader.java   |  58 ++
 .../lucene/analysis/ko/TestKoreanAnalyzer.java  | 109 +++
 ...TestKoreanPartOfSpeechStopFilterFactory.java |  59 ++
 .../ko/TestKoreanReadingFormFilter.java         |  75 ++
 .../ko/TestKoreanReadingFormFilterFactory.java  |  51 +
 .../lucene/analysis/ko/TestKoreanTokenizer.java | 355 +++++++
 .../analysis/ko/TestKoreanTokenizerFactory.java | 113 +++
 .../ko/dict/TestTokenInfoDictionary.java        | 113 +++
 .../analysis/ko/dict/UserDictionaryTest.java    |  62 ++
 .../org/apache/lucene/analysis/ko/userdict.txt  |   5 +
 .../ko/util/BinaryDictionaryWriter.java         | 282 ++++++
 .../ko/util/CharacterDefinitionWriter.java      |  94 ++
 .../ko/util/ConnectionCostsBuilder.java         |  67 ++
 .../analysis/ko/util/ConnectionCostsWriter.java |  75 ++
 .../analysis/ko/util/DictionaryBuilder.java     |  67 ++
 .../ko/util/TokenInfoDictionaryBuilder.java     | 150 +++
 .../ko/util/TokenInfoDictionaryWriter.java      |  49 +
 .../ko/util/UnknownDictionaryBuilder.java       | 134 +++
 .../ko/util/UnknownDictionaryWriter.java        |  65 ++
 .../analysis/ko/dict/UnknownDictionaryTest.java |  74 ++
 .../apache/lucene/index/FilterMergePolicy.java  | 106 ++
 .../org/apache/lucene/index/MergePolicy.java    |   2 +-
 .../apache/lucene/index/MergePolicyWrapper.java | 100 --
 .../org/apache/lucene/index/NoMergePolicy.java  |  11 +-
 .../index/OneMergeWrappingMergePolicy.java      |   2 +-
 .../org/apache/lucene/index/PendingDeletes.java |  12 +-
 .../apache/lucene/index/PendingSoftDeletes.java |  11 +-
 .../apache/lucene/index/ReadersAndUpdates.java  |   3 +-
 .../lucene/index/UpgradeIndexMergePolicy.java   |   2 +-
 .../search/DisjunctionMatchesIterator.java      |   6 +-
 .../org/apache/lucene/search/LRUQueryCache.java |  22 +-
 .../java/org/apache/lucene/search/Matches.java  |  17 +-
 .../apache/lucene/search/MatchesIterator.java   |   2 +
 .../java/org/apache/lucene/search/Weight.java   |   1 +
 .../index/TestDemoParallelLeafReader.java       |   2 +-
 .../lucene/index/TestFilterMergePolicy.java     |  36 +
 .../apache/lucene/index/TestIndexSorting.java   |   1 +
 .../apache/lucene/index/TestIndexWriter.java    |   2 +-
 .../lucene/index/TestIndexWriterOnDiskFull.java |   2 +-
 .../lucene/index/TestMergePolicyWrapper.java    |  37 -
 .../apache/lucene/index/TestMultiFields.java    |   2 +-
 .../apache/lucene/index/TestPendingDeletes.java |   8 +-
 .../lucene/index/TestPendingSoftDeletes.java    |  23 +-
 .../apache/lucene/search/TestLRUQueryCache.java |  72 +-
 lucene/ivy-versions.properties                  |   1 +
 lucene/module-build.xml                         |  22 +
 .../spatial3d/geom/GeoComplexPolygon.java       | 381 ++++----
 .../org/apache/lucene/spatial3d/geom/Plane.java |   5 +-
 .../lucene/spatial3d/geom/SidedPlane.java       |  24 +
 .../apache/lucene/spatial3d/geom/Geo3DUtil.java |   7 +
 .../lucene/spatial3d/geom/GeoPolygonTest.java   |  20 +-
 .../spatial3d/geom/RandomGeoPolygonTest.java    |   1 -
 .../search/spell/LuceneLevenshteinDistance.java |   8 +
 .../apache/lucene/index/ForceMergePolicy.java   |   2 +-
 solr/CHANGES.txt                                |  27 +-
 .../handler/dataimport/DataImportHandler.java   |  37 +-
 .../org/apache/solr/cloud/ElectionContext.java  |   3 +-
 .../org/apache/solr/cloud/RecoveryStrategy.java |   2 +-
 .../solr/cloud/api/collections/RestoreCmd.java  |  26 +-
 .../solr/handler/CdcrReplicatorManager.java     |  29 +
 .../component/HttpShardHandlerFactory.java      | 165 +++-
 .../apache/solr/index/SortingMergePolicy.java   |   4 +-
 .../solr/response/GraphMLResponseWriter.java    |  14 +-
 .../solr/response/TextResponseWriter.java       |  27 +-
 .../solr/schema/LatLonPointSpatialField.java    |  15 +-
 .../spelling/ConjunctionSolrSpellChecker.java   |   3 +-
 .../DocBasedVersionConstraintsProcessor.java    | 512 ++++++++++
 ...BasedVersionConstraintsProcessorFactory.java | 441 ++-------
 .../solrconfig-externalversionconstraint.xml    |  32 +-
 .../org/apache/solr/TestDistributedSearch.java  |   1 +
 .../org/apache/solr/cloud/AddReplicaTest.java   |   1 +
 .../apache/solr/cloud/AliasIntegrationTest.java |   1 +
 .../cloud/ChaosMonkeyNothingIsSafeTest.java     |   1 +
 .../solr/cloud/CreateRoutedAliasTest.java       |   4 +
 .../apache/solr/cloud/DeleteReplicaTest.java    |   1 +
 .../solr/cloud/LeaderVoteWaitTimeoutTest.java   |  22 +-
 .../apache/solr/cloud/TestCloudRecovery.java    |   1 +
 .../TestLeaderInitiatedRecoveryThread.java      |   3 +
 .../solr/cloud/TestStressInPlaceUpdates.java    |   1 +
 .../AbstractCloudBackupRestoreTestCase.java     |  42 +-
 .../CollectionsAPIDistributedZkTest.java        |   1 -
 .../TestCollectionsAPIViaSolrCloudCluster.java  |   1 +
 .../autoscaling/ComputePlanActionTest.java      |   1 +
 .../cloud/autoscaling/IndexSizeTriggerTest.java |  24 +-
 .../cloud/autoscaling/ScheduledTriggerTest.java |   1 +
 .../sim/SimClusterStateProvider.java            |   9 +-
 .../autoscaling/sim/TestTriggerIntegration.java |   1 +
 .../solr/cloud/cdcr/CdcrBidirectionalTest.java  |  24 +-
 .../solr/cloud/cdcr/CdcrBootstrapTest.java      |   5 +-
 .../apache/solr/cloud/cdcr/CdcrTestsUtil.java   |  38 +
 .../admin/AutoscalingHistoryHandlerTest.java    |   3 +-
 .../component/TestHttpShardHandlerFactory.java  | 119 +++
 .../apache/solr/search/TestSolr4Spatial2.java   |  31 +-
 .../ConjunctionSolrSpellCheckerTest.java        |  35 +-
 .../update/TestDocBasedVersionConstraints.java  |  94 +-
 .../src/distributed-requests.adoc               |  40 +
 .../shards-and-indexing-data-in-solrcloud.adoc  |   4 +
 .../src/updating-parts-of-documents.adoc        |  10 +-
 .../org/apache/solr/client/solrj/io/Lang.java   |   1 +
 .../client/solrj/io/eval/ValueAtEvaluator.java  |  74 ++
 .../apache/solr/common/params/CommonParams.java |   1 +
 .../common/params/ModifiableSolrParams.java     |  20 +-
 .../solr/common/params/MultiMapSolrParams.java  |  11 +-
 .../apache/solr/common/params/ShardParams.java  |  12 +
 .../apache/solr/common/params/SolrParams.java   | 102 +-
 .../client/solrj/impl/CloudSolrClientTest.java  | 143 ++-
 .../apache/solr/client/solrj/io/TestLang.java   |   4 +-
 .../solrj/io/stream/MathExpressionTest.java     |  25 +
 .../cloud/TestCollectionStateWatchers.java      |   1 +
 .../solr/common/params/SolrParamTest.java       |  80 +-
 .../org/apache/solr/util/RandomMergePolicy.java |   4 +-
 162 files changed, 8249 insertions(+), 1024 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5bbe689d/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestTriggerIntegration.java
----------------------------------------------------------------------

[36/46] lucene-solr:jira/solr-11833: SOLR-12221: Add valueAt Stream Evaluator

Posted by ab...@apache.org.

SOLR-12221: Add valueAt Stream Evaluator


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/487daab6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/487daab6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/487daab6

Branch: refs/heads/jira/solr-11833
Commit: 487daab62978b9e331ffa59ce6be2b527e6b5526
Parents: 8462b13
Author: Joel Bernstein <jb...@apache.org>
Authored: Fri Apr 13 13:31:30 2018 -0400
Committer: Joel Bernstein <jb...@apache.org>
Committed: Fri Apr 13 13:31:51 2018 -0400

----------------------------------------------------------------------
 .../org/apache/solr/client/solrj/io/Lang.java   |  1 +
 .../client/solrj/io/eval/ValueAtEvaluator.java  | 74 ++++++++++++++++++++
 .../apache/solr/client/solrj/io/TestLang.java   |  4 +-
 .../solrj/io/stream/MathExpressionTest.java     | 25 +++++++
 4 files changed, 102 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/487daab6/solr/solrj/src/java/org/apache/solr/client/solrj/io/Lang.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/Lang.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/Lang.java
index 08ba211..fdbb875 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/Lang.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/Lang.java
@@ -233,6 +233,7 @@ public class Lang {
         .withFunctionName("linfnorm", LInfNormEvaluator.class)
         .withFunctionName("matrixMult", MatrixMultiplyEvaluator.class)
         .withFunctionName("bicubicSpline", BicubicSplineEvaluator.class)
+        .withFunctionName("valueAt", ValueAtEvaluator.class)
 
             // Boolean Stream Evaluators
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/487daab6/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ValueAtEvaluator.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ValueAtEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ValueAtEvaluator.java
new file mode 100644
index 0000000..6df3709
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ValueAtEvaluator.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.client.solrj.io.eval;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
+import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
+
+public class ValueAtEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker {
+  protected static final long serialVersionUID = 1L;
+
+  public ValueAtEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
+    super(expression, factory);
+  }
+
+  @Override
+  public Object doWork(Object... values) throws IOException {
+    if(values[0] instanceof List) {
+
+      List<Number> c = (List<Number>) values[0];
+      int index = -1;
+      if(values.length == 2) {
+        index = ((Number)values[1]).intValue();
+        if(index >= c.size()) {
+          throw new IOException("Index out of bounds: "+index);
+        }
+      } else {
+        throw new IOException("The valueAt function expects an array and array index as parameters.");
+      }
+      return c.get(index);
+
+    } else if(values[0] instanceof Matrix) {
+
+      Matrix c = (Matrix) values[0];
+      double[][] data = c.getData();
+      int row = -1;
+      int col = -1;
+      if(values.length == 3) {
+        row = ((Number)values[1]).intValue();
+        if(row >= data.length) {
+          throw new IOException("Row index out of bounds: "+row);
+        }
+
+        col = ((Number)values[2]).intValue();
+        if(col >= data[0].length) {
+          throw new IOException("Column index out of bounds: "+col);
+        }
+
+      } else {
+        throw new IOException("The valueAt function expects a matrix and row and column indexes");
+      }
+      return data[row][col];
+    } else {
+      throw new IOException("The valueAt function expects a numeric array or matrix as the first parameter");
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/487daab6/solr/solrj/src/test/org/apache/solr/client/solrj/io/TestLang.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/TestLang.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/TestLang.java
index 498cb2e..87f5c46 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/TestLang.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/TestLang.java
@@ -68,7 +68,7 @@ public class TestLang extends LuceneTestCase {
        TemporalEvaluatorEpoch.FUNCTION_NAME, TemporalEvaluatorWeek.FUNCTION_NAME, TemporalEvaluatorQuarter.FUNCTION_NAME,
        TemporalEvaluatorDayOfQuarter.FUNCTION_NAME, "abs", "add", "div", "mult", "sub", "log", "pow",
       "mod", "ceil", "floor", "sin", "asin", "sinh", "cos", "acos", "cosh", "tan", "atan", "tanh", "round", "sqrt",
-      "cbrt", "coalesce", "uuid", "if", "convert"};
+      "cbrt", "coalesce", "uuid", "if", "convert", "valueAt"};
 
   @Test
   public void testLang() {
@@ -85,7 +85,7 @@ public class TestLang extends LuceneTestCase {
       assertTrue("Testing function:"+func, registeredFunctions.containsKey(func));
     }
 
-    //Check that ech function that is registered is expected.
+    //Check that each function that is registered is expected.
     Set<String> keys = registeredFunctions.keySet();
     for(String key : keys) {
       assertTrue("Testing key:"+key, functions.contains(key));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/487daab6/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/MathExpressionTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/MathExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/MathExpressionTest.java
index abc1c21..07570a9 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/MathExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/MathExpressionTest.java
@@ -2354,6 +2354,31 @@ public class MathExpressionTest extends SolrCloudTestCase {
     }
   }
 
+
+  @Test
+  public void testValueAt() throws Exception {
+    String cexpr = "let(echo=true, " +
+        "               b=array(1,2,3,4), " +
+        "               c=matrix(array(5,6,7), " +
+        "                        array(8,9,10)), " +
+        "               d=valueAt(b, 3)," +
+        "               e=valueAt(c, 1, 0))";
+    ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
+    paramsLoc.set("expr", cexpr);
+    paramsLoc.set("qt", "/stream");
+    String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
+    TupleStream solrStream = new SolrStream(url, paramsLoc);
+    StreamContext context = new StreamContext();
+    solrStream.setStreamContext(context);
+    List<Tuple> tuples = getTuples(solrStream);
+    assertTrue(tuples.size() == 1);
+    Number value1 = (Number)tuples.get(0).get("d");
+    Number value2 = (Number)tuples.get(0).get("e");
+    assertEquals(value1.intValue(), 4);
+    assertEquals(value2.intValue(), 8);
+  }
+
+
   @Test
   public void testBetaDistribution() throws Exception {
     String cexpr = "let(a=sample(betaDistribution(1, 5), 50000), b=hist(a, 11), c=col(b, N))";

[13/46] lucene-solr:jira/solr-11833: LUCENE-8229: add lucene.experimental, plus small changes

Posted by ab...@apache.org.

LUCENE-8229: add lucene.experimental, plus small changes


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e6b65151
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e6b65151
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e6b65151

Branch: refs/heads/jira/solr-11833
Commit: e6b65151b6f4aec66376b3d4acc1a057167f62f6
Parents: 7a49371
Author: David Smiley <ds...@apache.org>
Authored: Thu Apr 12 10:59:58 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Thu Apr 12 10:59:58 2018 -0400

----------------------------------------------------------------------
 .../lucene/search/DisjunctionMatchesIterator.java  |  6 +-----
 .../src/java/org/apache/lucene/search/Matches.java | 17 +++++++----------
 .../org/apache/lucene/search/MatchesIterator.java  |  2 ++
 .../src/java/org/apache/lucene/search/Weight.java  |  1 +
 4 files changed, 11 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6b65151/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
index 37770d2..a18b280 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
@@ -93,11 +93,7 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
         }
       }
     }
-    if (mis.size() == 0)
-      return null;
-    if (mis.size() == 1)
-      return mis.get(0);
-    return new DisjunctionMatchesIterator(mis);
+    return fromSubIterators(mis);
   }
 
   static MatchesIterator fromSubIterators(List<MatchesIterator> mis) throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6b65151/lucene/core/src/java/org/apache/lucene/search/Matches.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Matches.java b/lucene/core/src/java/org/apache/lucene/search/Matches.java
index 3670563..de9a692 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Matches.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Matches.java
@@ -20,12 +20,11 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
-import java.util.Set;
 import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 
 /**
  * Reports the positions and optionally offsets of all matching terms in a query
@@ -34,6 +33,8 @@ import java.util.stream.Collectors;
  * To obtain a {@link MatchesIterator} for a particular field, call {@link #getMatches(String)}.
  * Note that you can call {@link #getMatches(String)} multiple times to retrieve new
  * iterators, but it is not thread-safe.
+ *
+ * @lucene.experimental
  */
 public interface Matches extends Iterable<String> {
 
@@ -73,16 +74,11 @@ public interface Matches extends Iterable<String> {
     if (sm.size() == 1) {
       return sm.get(0);
     }
-    Set<String> fields = new HashSet<>();
-    for (Matches m : sm) {
-      for (String field : m) {
-        fields.add(field);
-      }
-    }
+
     return new Matches() {
       @Override
       public MatchesIterator getMatches(String field) throws IOException {
-        List<MatchesIterator> subIterators = new ArrayList<>();
+        List<MatchesIterator> subIterators = new ArrayList<>(sm.size());
         for (Matches m : sm) {
           MatchesIterator it = m.getMatches(field);
           if (it != null) {
@@ -94,7 +90,8 @@ public interface Matches extends Iterable<String> {
 
       @Override
       public Iterator<String> iterator() {
-        return fields.iterator();
+        // for each sub-match, iterate its fields (it's an Iterable of the fields), and return the distinct set
+        return sm.stream().flatMap(m -> StreamSupport.stream(m.spliterator(), false)).distinct().iterator();
       }
     };
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6b65151/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
index b874263..d695ea5 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
@@ -32,6 +32,8 @@ import org.apache.lucene.util.BytesRef;
  * Matches are ordered by start position, and then by end position.  Match intervals may overlap.
  *
  * @see Weight#matches(LeafReaderContext, int)
+ *
+ * @lucene.experimental
  */
 public interface MatchesIterator {
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6b65151/lucene/core/src/java/org/apache/lucene/search/Weight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java
index 3281b41..0e95aab 100644
--- a/lucene/core/src/java/org/apache/lucene/search/Weight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java
@@ -78,6 +78,7 @@ public abstract class Weight implements SegmentCacheable {
    *
    * @param context the reader's context to create the {@link Matches} for
    * @param doc     the document's id relative to the given context's reader
+   * @lucene.experimental
    */
   public Matches matches(LeafReaderContext context, int doc) throws IOException {
     Scorer scorer = scorer(context);

[32/46] lucene-solr:jira/solr-11833: SOLR-11913: SolrParams now implements Iterable> and has stream()

Posted by ab...@apache.org.

SOLR-11913: SolrParams now implements Iterable<Map.Entry<String,String[]>>
and has stream()


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9a149ad7
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9a149ad7
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9a149ad7

Branch: refs/heads/jira/solr-11833
Commit: 9a149ad7e77837b7a99e84a4968fe43747ec30a0
Parents: f88a553
Author: David Smiley <ds...@apache.org>
Authored: Fri Apr 13 12:05:23 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Fri Apr 13 12:05:23 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../handler/dataimport/DataImportHandler.java   |  37 +++----
 .../solr/response/TextResponseWriter.java       |  27 ++---
 .../common/params/ModifiableSolrParams.java     |  20 ++--
 .../solr/common/params/MultiMapSolrParams.java  |  11 +-
 .../apache/solr/common/params/SolrParams.java   | 102 +++++++++++++++----
 .../solr/common/params/SolrParamTest.java       |  80 +++++++++++----
 7 files changed, 199 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9a149ad7/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 3194e3d..999d2f6 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -96,6 +96,9 @@ New Features
 * SOLR-11336: DocBasedVersionConstraintsProcessorFactory is more extensible and now supports a list of versioned fields.
   (versionField config may now be a comma-delimited list). (Michael Braun via David Smiley)
 
+* SOLR-11913: SolrJ SolrParams now implements Iterable<Map.Entry<String, String[]>> and also has a stream() method
+  using it for convenience. (David Smiley, Tapan Vaishnav)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9a149ad7/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
----------------------------------------------------------------------
diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
index 046901a..c9e997c 100644
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
@@ -16,7 +16,11 @@
  */
 package org.apache.solr.handler.dataimport;
 
-import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD;
+import java.lang.invoke.MethodHandles;
+import java.lang.reflect.Constructor;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
@@ -24,29 +28,26 @@ import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.MapSolrParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.metrics.MetricsMap;
 import org.apache.solr.metrics.SolrMetricManager;
-import org.apache.solr.response.RawResponseWriter;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.RawResponseWriter;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
 import org.apache.solr.update.processor.UpdateRequestProcessorChain;
 import org.apache.solr.util.plugin.SolrCoreAware;
-
-import java.util.*;
-import java.lang.invoke.MethodHandles;
-import java.lang.reflect.Constructor;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.handler.dataimport.DataImporter.IMPORT_CMD;
+
 /**
  * <p>
  * Solr Request Handler for data import from databases and REST data sources.
@@ -210,18 +211,18 @@ public class DataImportHandler extends RequestHandlerBase implements
     rsp.add("statusMessages", importer.getStatusMessages());
   }
 
+  /** The value is converted to a String or {@code List<String>} if multi-valued. */
   private Map<String, Object> getParamsMap(SolrParams params) {
-    Iterator<String> names = params.getParameterNamesIterator();
     Map<String, Object> result = new HashMap<>();
-    while (names.hasNext()) {
-      String s = names.next();
-      String[] val = params.getParams(s);
-      if (val == null || val.length < 1)
-        continue;
-      if (val.length == 1)
-        result.put(s, val[0]);
-      else
-        result.put(s, Arrays.asList(val));
+    for (Map.Entry<String, String[]> pair : params){
+        String s = pair.getKey();
+        String[] val = pair.getValue();
+        if (val == null || val.length < 1)
+          continue;
+        if (val.length == 1)
+          result.put(s, val[0]);
+        else
+          result.put(s, Arrays.asList(val));
     }
     return result;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9a149ad7/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java b/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
index fb61400..46c037d 100644
--- a/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
+++ b/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
@@ -36,10 +36,10 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.EnumFieldValue;
 import org.apache.solr.common.IteratorWriter;
 import org.apache.solr.common.MapSerializable;
+import org.apache.solr.common.MapWriter;
 import org.apache.solr.common.PushWriter;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.common.MapWriter;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.util.Base64;
 import org.apache.solr.common.util.NamedList;
@@ -127,8 +127,9 @@ public abstract class TextResponseWriter implements PushWriter {
 
     // if there get to be enough types, perhaps hashing on the type
     // to get a handler might be faster (but types must be exact to do that...)
+    //    (see a patch on LUCENE-3041 for inspiration)
 
-    // go in order of most common to least common
+    // go in order of most common to least common, however some of the more general types like Map belong towards the end
     if (val == null) {
       writeNull(name);
     } else if (val instanceof String) {
@@ -170,20 +171,25 @@ public abstract class TextResponseWriter implements PushWriter {
     // restricts the fields to write...?
     } else if (val instanceof SolrDocumentList) {
       writeSolrDocumentList(name, (SolrDocumentList)val, returnFields);
-    } else if (val instanceof Map) {
-      writeMap(name, (Map)val, false, true);
     } else if (val instanceof NamedList) {
       writeNamedList(name, (NamedList)val);
     } else if (val instanceof Path) {
       writeStr(name, ((Path) val).toAbsolutePath().toString(), true);
     } else if (val instanceof IteratorWriter) {
       writeIterator((IteratorWriter) val);
-    } else if (val instanceof Iterable) {
+    } else if (val instanceof MapWriter) {
+      writeMap((MapWriter) val);
+    } else if (val instanceof MapSerializable) {
+      //todo find a better way to reuse the map more efficiently
+      writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
+    } else if (val instanceof Map) {
+      writeMap(name, (Map)val, false, true);
+    } else if (val instanceof Iterator) { // very generic; keep towards the end
+      writeArray(name, (Iterator) val);
+    } else if (val instanceof Iterable) { // very generic; keep towards the end
       writeArray(name,((Iterable)val).iterator());
     } else if (val instanceof Object[]) {
       writeArray(name,(Object[])val);
-    } else if (val instanceof Iterator) {
-      writeArray(name, (Iterator) val);
     } else if (val instanceof byte[]) {
       byte[] arr = (byte[])val;
       writeByteArr(name, arr, 0, arr.length);
@@ -194,13 +200,8 @@ public abstract class TextResponseWriter implements PushWriter {
       writeStr(name, val.toString(), true);
     } else if (val instanceof WriteableValue) {
       ((WriteableValue)val).write(name, this);
-    } else if (val instanceof MapWriter) {
-      writeMap((MapWriter) val);
-    } else if (val instanceof MapSerializable) {
-      //todo find a better way to reuse the map more efficiently
-      writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
     } else {
-      // default... for debugging only
+      // default... for debugging only.  Would be nice to "assert false" ?
       writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9a149ad7/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java
index 5a28e40..22dd436 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/ModifiableSolrParams.java
@@ -23,7 +23,7 @@ import java.util.Set;
 
 
 /**
- * This class is similar to MultiMapSolrParams except you can edit the 
+ * This class is similar to {@link MultiMapSolrParams} except you can edit the
  * parameters after it is initialized.  It has helper functions to set/add
  * integer and boolean param values.
  * 
@@ -132,12 +132,13 @@ public class ModifiableSolrParams extends SolrParams
     return this;
   }
 
-  public void add(SolrParams params)
-  {
-    Iterator<String> names = params.getParameterNamesIterator();
-    while (names.hasNext()) {
-      String name = names.next();
-      set(name, params.getParams(name));
+  /**
+   * Add all of the params provided in the parameter to <em>this</em> params. Any current value(s) for the same
+   * key will be overridden.
+   */
+  public void add(SolrParams params) {
+    for (Map.Entry<String, String[]> pair: params) {
+      set(pair.getKey(), pair.getValue());
     }
   }
   
@@ -205,4 +206,9 @@ public class ModifiableSolrParams extends SolrParams
   public String[] getParams(String param) {
     return vals.get( param );
   }
+
+  @Override
+  public Iterator<Map.Entry<String, String[]>> iterator() {
+    return vals.entrySet().iterator();
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9a149ad7/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java
index ed6a2e7..8a5416b 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/MultiMapSolrParams.java
@@ -74,6 +74,11 @@ public class MultiMapSolrParams extends SolrParams {
     return map.keySet().iterator();
   }
 
+  @Override
+  public Iterator<Map.Entry<String, String[]>> iterator() {
+    return map.entrySet().iterator();
+  }
+
   public Map<String,String[]> getMap() { return map; }
 
   /** Returns a MultiMap view of the SolrParams as efficiently as possible.  The returned map may or may not be a backing implementation. */
@@ -97,10 +102,8 @@ public class MultiMapSolrParams extends SolrParams {
       return map;
     } else {
       Map<String,String[]> map = new HashMap<>();
-      Iterator<String> iterator = params.getParameterNamesIterator();
-      while (iterator.hasNext()) {
-        String name = iterator.next();
-        map.put(name, params.getParams(name));
+      for (Map.Entry<String, String[]> pair : params) {
+        map.put(pair.getKey(), pair.getValue());
       }
       return map;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9a149ad7/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java b/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java
index 130ca3e..b78c652 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/SolrParams.java
@@ -29,6 +29,8 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
 
 import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.MapWriter;
@@ -37,19 +39,28 @@ import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
 
-/**  SolrParams hold request parameters.
- *
- *
+/**
+ * SolrParams is designed to hold parameters to Solr, often from the request coming into Solr.
+ * It's basically a MultiMap of String keys to one or more String values.  Neither keys nor values may be null.
+ * Unlike a general Map/MultiMap, the size is unknown without iterating over each parameter name.
  */
-public abstract class SolrParams implements Serializable, MapWriter {
+public abstract class SolrParams implements Serializable, MapWriter, Iterable<Map.Entry<String, String[]>> {
 
-  /** returns the String value of a param, or null if not set */
+  /**
+   * Returns the first String value of a param, or null if not set.
+   * To get all, call {@link #getParams(String)} instead.
+   */
   public abstract String get(String param);
 
-  /** returns an array of the String values of a param, or null if none */
+  /** returns an array of the String values of a param, or null if no mapping for the param exists. */
   public abstract String[] getParams(String param);
 
-  /** returns an Iterator over the parameter names */
+  /**
+   * Returns an Iterator over the parameter names.
+   * If you were to call a getter for this parameter, you should get a non-null value.
+   * Since you probably want the value, consider using Java 5 for-each style instead for convenience since a SolrParams
+   * implements {@link Iterable}.
+   */
   public abstract Iterator<String> getParameterNamesIterator();
 
   /** returns the value of the param, or def if not set */
@@ -58,6 +69,64 @@ public abstract class SolrParams implements Serializable, MapWriter {
     return val==null ? def : val;
   }
 
+  @Override
+  public void writeMap(EntryWriter ew) throws IOException {
+    //TODO don't call toNamedList; more efficiently implement here
+    //note: multiple values, if present, are a String[] under 1 key
+    toNamedList().forEach((k, v) -> {
+      if (v == null || "".equals(v)) return;
+      try {
+        ew.put(k, v);
+      } catch (IOException e) {
+        throw new RuntimeException("Error serializing", e);
+      }
+    });
+  }
+
+  /** Returns an Iterator of {@code Map.Entry} providing a multi-map view.  Treat it as read-only. */
+  @Override
+  public Iterator<Map.Entry<String, String[]>> iterator() {
+    Iterator<String> it = getParameterNamesIterator();
+    return new Iterator<Map.Entry<String, String[]>>() {
+      @Override
+      public boolean hasNext() {
+        return it.hasNext();
+      }
+      @Override
+      public Map.Entry<String, String[]> next() {
+        String key = it.next();
+        return new Map.Entry<String, String[]>() {
+          @Override
+          public String getKey() {
+            return key;
+          }
+
+          @Override
+          public String[] getValue() {
+            return getParams(key);
+          }
+
+          @Override
+          public String[] setValue(String[] newValue) {
+            throw new UnsupportedOperationException("read-only");
+          }
+
+          @Override
+          public String toString() {
+            return getKey() + "=" + Arrays.toString(getValue());
+          }
+        };
+      }
+    };
+  }
+
+  /** A {@link Stream} view over {@link #iterator()} -- for convenience.  Treat it as read-only. */
+  public Stream<Map.Entry<String, String[]>> stream() {
+    return StreamSupport.stream(spliterator(), false);
+  }
+  // Do we add Map.forEach equivalent too?  But it eager-fetches the value, and Iterable<Map.Entry> allows the user
+  //  to only get the value when needed.
+
   /** returns a RequiredSolrParams wrapping this */
   public RequiredSolrParams required()
   {
@@ -439,7 +508,10 @@ public abstract class SolrParams implements Serializable, MapWriter {
     return toSolrParams(nl);
   }
 
-  /** Convert this to a NamedList */
+  /**
+   * Convert this to a NamedList of unique keys with either String or String[] values depending on
+   * how many values there are for the parameter.
+   */
   public NamedList<Object> toNamedList() {
     final SimpleOrderedMap<Object> result = new SimpleOrderedMap<>();
 
@@ -549,18 +621,4 @@ public abstract class SolrParams implements Serializable, MapWriter {
     }
   }
 
-  @Override
-  public void writeMap(EntryWriter ew) throws IOException {
-    toNamedList().forEach((k, v) -> {
-      if (v == null || "".equals(v)) return;
-      try {
-        ew.put(k, v);
-      } catch (IOException e) {
-        throw new RuntimeException("Error serializing", e);
-      }
-    });
-
-  }
-
-
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9a149ad7/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java b/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java
index 80f9036..a2cb9bd 100644
--- a/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java
+++ b/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java
@@ -40,15 +40,15 @@ public class SolrParamTest extends LuceneTestCase {
     assertIterSize("aaa: foo", 1, aaa);
     assertIterSize("required aaa: foo", 1, aaa.required());
 
-    assertEquals(new String[] { "a1", "a2" }, aaa.getParams("foo"));
+    assertArrayEquals(new String[] { "a1", "a2" }, aaa.getParams("foo"));
 
     aaa.add("yak", "a3");
 
     assertIterSize("aaa: foo & yak", 2, aaa);
     assertIterSize("required aaa: foo & yak", 2, aaa.required());
 
-    assertEquals(new String[] { "a1", "a2" }, aaa.getParams("foo"));
-    assertEquals(new String[] { "a3" }, aaa.getParams("yak"));
+    assertArrayEquals(new String[] { "a1", "a2" }, aaa.getParams("foo"));
+    assertArrayEquals(new String[] { "a3" }, aaa.getParams("yak"));
 
     ModifiableSolrParams bbb = new ModifiableSolrParams();
     bbb.add("foo", "b1");
@@ -58,26 +58,72 @@ public class SolrParamTest extends LuceneTestCase {
     assertIterSize("bbb: foo & zot", 2, bbb);
     assertIterSize("required bbb: foo & zot", 2, bbb.required());
 
-    assertEquals(new String[] { "b1", "b2" }, bbb.getParams("foo"));
-    assertEquals(new String[] { "b3" }, bbb.getParams("zot"));
+    assertArrayEquals(new String[] { "b1", "b2" }, bbb.getParams("foo"));
+    assertArrayEquals(new String[] { "b3" }, bbb.getParams("zot"));
 
     SolrParams def = SolrParams.wrapDefaults(aaa, bbb);
 
     assertIterSize("def: aaa + bbb", 3, def);
     assertIterSize("required def: aaa + bbb", 3, def.required());
 
-    assertEquals(new String[] { "a1", "a2" }, def.getParams("foo"));
-    assertEquals(new String[] { "a3" }, def.getParams("yak"));
-    assertEquals(new String[] { "b3" }, def.getParams("zot"));
+    assertArrayEquals(new String[] { "a1", "a2" }, def.getParams("foo"));
+    assertArrayEquals(new String[] { "a3" }, def.getParams("yak"));
+    assertArrayEquals(new String[] { "b3" }, def.getParams("zot"));
 
     SolrParams append = SolrParams.wrapAppended(aaa, bbb);
 
     assertIterSize("append: aaa + bbb", 3, append);
     assertIterSize("required appended: aaa + bbb", 3, append.required());
 
-    assertEquals(new String[] { "a1", "a2", "b1", "b2", }, append.getParams("foo"));
-    assertEquals(new String[] { "a3" }, append.getParams("yak"));
-    assertEquals(new String[] { "b3" }, append.getParams("zot"));
+    assertArrayEquals(new String[] { "a1", "a2", "b1", "b2", }, append.getParams("foo"));
+    assertArrayEquals(new String[] { "a3" }, append.getParams("yak"));
+    assertArrayEquals(new String[] { "b3" }, append.getParams("zot"));
+
+  }
+
+  public void testMapEntryIterators() {
+
+    ModifiableSolrParams aaa = new ModifiableSolrParams();
+    aaa.add("foo", "a1");
+    aaa.add("foo", "a2");
+
+    assertIterSize("aaa: foo", 1, aaa);
+    assertIterSize("required aaa: foo", 1, aaa.required());
+
+    assertArrayEquals(new String[] { "a1", "a2" }, aaa.getParams("foo"));
+
+    aaa.add("yak", "a3");
+
+    assertIterSize("aaa: foo & yak", 2, aaa);
+    assertIterSize("required aaa: foo & yak", 2, aaa.required());
+
+    assertArrayEquals(new String[] { "a1", "a2" }, aaa.getParams("foo"));
+    assertArrayEquals(new String[] { "a3" }, aaa.getParams("yak"));
+
+    ModifiableSolrParams bbb = new ModifiableSolrParams();
+    bbb.add("foo", "b1");
+    bbb.add("foo", "b2");
+    bbb.add("zot", "b3");
+
+    assertIterSize("bbb: foo & zot", 2, bbb);
+    assertIterSize("required bbb: foo & zot", 2, bbb.required());
+
+    assertArrayEquals(new String[] { "b1", "b2" }, bbb.getParams("foo"));
+    assertArrayEquals(new String[] { "b3" }, bbb.getParams("zot"));
+
+    SolrParams append = SolrParams.wrapAppended(aaa, bbb);
+
+    assertIterSize("append: aaa + bbb", 3, append);
+    assertIterSize("required appended: aaa + bbb", 3, append.required());
+
+    assertArrayEquals(new String[] { "a1", "a2", "b1", "b2", }, append.getParams("foo"));
+    assertArrayEquals(new String[] { "a3" }, append.getParams("yak"));
+    assertArrayEquals(new String[] { "b3" }, append.getParams("zot"));
+
+    Iterator<Map.Entry<String, String[]>> it = append.iterator();
+    assertArrayEquals(new String[] { "a1", "a2", "b1", "b2", }, it.next().getValue());
+    assertArrayEquals(new String[] { "a3" }, it.next().getValue());
+    assertArrayEquals(new String[] { "b3" }, it.next().getValue());
 
   }
 
@@ -124,16 +170,16 @@ public class SolrParamTest extends LuceneTestCase {
     bbb.add("zot", "b3");
     
     SolrParams def = SolrParams.wrapDefaults(aaa, bbb);
-    assertEquals(new String[] { "a1", "a2" }, def.getParams("foo"));
-    assertEquals(new String[] { "a3" }, def.getParams("yak"));
-    assertEquals(new String[] { "b3" }, def.getParams("zot"));
+    assertArrayEquals(new String[] { "a1", "a2" }, def.getParams("foo"));
+    assertArrayEquals(new String[] { "a3" }, def.getParams("yak"));
+    assertArrayEquals(new String[] { "b3" }, def.getParams("zot"));
 
     ModifiableSolrParams combined = new ModifiableSolrParams();
     combined.add(def);
 
-    assertEquals(new String[] { "a1", "a2" }, combined.getParams("foo"));
-    assertEquals(new String[] { "a3" }, combined.getParams("yak"));
-    assertEquals(new String[] { "b3" }, combined.getParams("zot"));
+    assertArrayEquals(new String[] { "a1", "a2" }, combined.getParams("foo"));
+    assertArrayEquals(new String[] { "a3" }, combined.getParams("yak"));
+    assertArrayEquals(new String[] { "b3" }, combined.getParams("zot"));
 
   }

[43/46] lucene-solr:jira/solr-11833: LUCENE-8254: LRUQueryCache can leak locks

Posted by ab...@apache.org.

LUCENE-8254: LRUQueryCache can leak locks


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/19fa91db
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/19fa91db
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/19fa91db

Branch: refs/heads/jira/solr-11833
Commit: 19fa91dbfbca990df460a9e709b7f83c27bc27cd
Parents: 3028f3e
Author: Alan Woodward <ro...@apache.org>
Authored: Mon Apr 16 10:57:12 2018 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Mon Apr 16 10:57:12 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ++
 .../org/apache/lucene/search/LRUQueryCache.java | 22 +++----
 .../apache/lucene/search/TestLRUQueryCache.java | 63 +++++++++++++++++++-
 3 files changed, 77 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19fa91db/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e559099..1b790e4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -162,6 +162,10 @@ Bug Fixes
   index file names for updated doc values fields (Simon Willnauer,
   Michael McCandless, Nhat Nguyen)
 
+* LUCENE-8254: LRUQueryCache could cause IndexReader to hang on close, when
+  shared with another reader with no CacheHelper (Alan Woodward, Simon Willnauer,
+  Adrien Grand)
+
 Other
 
 * LUCENE-8228: removed obsolete IndexDeletionPolicy clone() requirements from

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19fa91db/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
index 9391afd..27480e0 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -727,16 +727,17 @@ public class LRUQueryCache implements QueryCache, Accountable {
         return in.scorerSupplier(context);
       }
 
-      // If the lock is already busy, prefer using the uncached version than waiting
-      if (lock.tryLock() == false) {
-        return in.scorerSupplier(context);
-      }
-
       final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
       if (cacheHelper == null) {
         // this reader has no cache helper
         return in.scorerSupplier(context);
       }
+
+      // If the lock is already busy, prefer using the uncached version than waiting
+      if (lock.tryLock() == false) {
+        return in.scorerSupplier(context);
+      }
+
       DocIdSet docIdSet;
       try {
         docIdSet = get(in.getQuery(), context, cacheHelper);
@@ -807,16 +808,17 @@ public class LRUQueryCache implements QueryCache, Accountable {
         return in.bulkScorer(context);
       }
 
-      // If the lock is already busy, prefer using the uncached version than waiting
-      if (lock.tryLock() == false) {
-        return in.bulkScorer(context);
-      }
-
       final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
       if (cacheHelper == null) {
         // this reader has no cacheHelper
         return in.bulkScorer(context);
       }
+
+      // If the lock is already busy, prefer using the uncached version than waiting
+      if (lock.tryLock() == false) {
+        return in.bulkScorer(context);
+      }
+
       DocIdSet docIdSet;
       try {
         docIdSet = get(in.getQuery(), context, cacheHelper);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19fa91db/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
index 74066ca..f4240e1 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@@ -62,7 +62,6 @@ import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.RamUsageTester;
 import org.apache.lucene.util.TestUtil;
-import org.junit.Test;
 
 public class TestLRUQueryCache extends LuceneTestCase {
 
@@ -1479,7 +1478,6 @@ public class TestLRUQueryCache extends LuceneTestCase {
     }
   }
 
-  @Test
   public void testDocValuesUpdatesDontBreakCache() throws IOException {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
@@ -1545,4 +1543,65 @@ public class TestLRUQueryCache extends LuceneTestCase {
     dir.close();
 
   }
+
+  public void testBulkScorerLocking() throws Exception {
+
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
+    IndexWriter w = new IndexWriter(dir, iwc);
+
+    final int numDocs = atLeast(10);
+    Document emptyDoc = new Document();
+    for (int d = 0; d < numDocs; ++d) {
+      for (int i = random().nextInt(5000); i >= 0; --i) {
+        w.addDocument(emptyDoc);
+      }
+      Document doc = new Document();
+      for (String value : Arrays.asList("foo", "bar", "baz")) {
+        if (random().nextBoolean()) {
+          doc.add(new StringField("field", value, Store.NO));
+        }
+      }
+    }
+    for (int i = TestUtil.nextInt(random(), 3000, 5000); i >= 0; --i) {
+      w.addDocument(emptyDoc);
+    }
+    if (random().nextBoolean()) {
+      w.forceMerge(1);
+    }
+
+    DirectoryReader reader = DirectoryReader.open(w);
+    DirectoryReader noCacheReader = new DummyDirectoryReader(reader);
+
+    LRUQueryCache cache = new LRUQueryCache(1, 100000, context -> true);
+    IndexSearcher searcher = new AssertingIndexSearcher(random(), reader);
+    searcher.setQueryCache(cache);
+    searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
+
+    Query query = new ConstantScoreQuery(new BooleanQuery.Builder()
+        .add(new BoostQuery(new TermQuery(new Term("field", "foo")), 3), Occur.SHOULD)
+        .add(new BoostQuery(new TermQuery(new Term("field", "bar")), 3), Occur.SHOULD)
+        .add(new BoostQuery(new TermQuery(new Term("field", "baz")), 3), Occur.SHOULD)
+        .build());
+
+    searcher.search(query, 1);
+
+    IndexSearcher noCacheHelperSearcher = new AssertingIndexSearcher(random(), noCacheReader);
+    noCacheHelperSearcher.setQueryCache(cache);
+    noCacheHelperSearcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
+    noCacheHelperSearcher.search(query, 1);
+
+    Thread t = new Thread(() -> {
+      try {
+        noCacheReader.close();
+        w.close();
+        dir.close();
+      }
+      catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    });
+    t.start();
+    t.join();
+  }
 }

[14/46] lucene-solr:jira/solr-11833: SOLR-12065: A successful restore collection should mark the shard state as active and not buffering

Posted by ab...@apache.org.

SOLR-12065: A successful restore collection should mark the shard state as active and not buffering


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7a57ca8c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7a57ca8c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7a57ca8c

Branch: refs/heads/jira/solr-11833
Commit: 7a57ca8c0d10ceb23cad6fe9bc3538314ce6b6ce
Parents: e6b6515
Author: Varun Thacker <va...@apache.org>
Authored: Thu Apr 12 08:18:35 2018 -0700
Committer: Varun Thacker <va...@apache.org>
Committed: Thu Apr 12 08:20:28 2018 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  5 ++-
 .../solr/cloud/api/collections/RestoreCmd.java  | 26 +++++++++---
 .../AbstractCloudBackupRestoreTestCase.java     | 42 ++++++++++++++------
 3 files changed, 55 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a57ca8c/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index da6642a..0c2feaf 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -143,6 +143,9 @@ Bug Fixes
 * SOLR-12190: Need to properly escape output in GraphMLResponseWriter. (yonik)
 
 * SOLR-12214: Leader may skip publish itself as ACTIVE when its last published state is DOWN (Cao Manh Dat)
+
+* SOLR-12065: A successful restore collection should mark the shard state as active and not buffering
+  (Rohit, Varun Thacker)
  
 Optimizations
 ----------------------
@@ -1991,7 +1994,7 @@ Bug Fixes
 
 * SOLR-11024: ParallelStream should set the StreamContext when constructing SolrStreams (Joel Bernstein)
 
-* SOLR-10908: CloudSolrStream.toExpression incorrectly handles fq clauses (Rohit Singh via Erick Erickson)
+* SOLR-10908: CloudSolrStream.toExpression incorrectly handles fq clauses (Rohit via Erick Erickson)
 
 * SOLR-11177: CoreContainer.load needs to send lazily loaded core descriptors to the proper list rather than send
   them all to the transient lists. (Erick Erickson)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a57ca8c/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
index 1823fe3..ca7c2d6 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
@@ -258,7 +258,6 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
           propMap.put(ASYNC, asyncId);
         }
         ocmh.addPropertyParams(message, propMap);
-
         ocmh.addReplica(clusterState, new ZkNodeProps(propMap), new NamedList(), null);
       }
 
@@ -272,11 +271,31 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
         params.set(NAME, "snapshot." + slice.getName());
         params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
         params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
-
         ocmh.sliceCmd(clusterState, params, null, slice, shardHandler, asyncId, requestMap);
       }
       ocmh.processResponses(new NamedList(), shardHandler, true, "Could not restore core", asyncId, requestMap);
 
+
+      for (Slice s: restoreCollection.getSlices()) {
+        for (Replica r : s.getReplicas()) {
+          String nodeName = r.getNodeName();
+          String coreNodeName = r.getCoreName();
+          Replica.State stateRep  = r.getState();
+
+          log.debug("Calling REQUESTAPPLYUPDATES on: nodeName={}, coreNodeName={}, state={}"
+              , nodeName, coreNodeName, stateRep.name());
+
+          ModifiableSolrParams params = new ModifiableSolrParams();
+          params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
+          params.set(CoreAdminParams.NAME, coreNodeName);
+
+          ocmh.sendShardRequest(nodeName, params, shardHandler, asyncId, requestMap);
+        }
+
+        ocmh.processResponses(new NamedList(), shardHandler, true, "REQUESTAPPLYUPDATES calls did not succeed", asyncId, requestMap);
+
+      }
+
       //Mark all shards in ACTIVE STATE
       {
         HashMap<String, Object> propMap = new HashMap<>();
@@ -288,9 +307,6 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
         inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
       }
 
-      //refresh the location copy of collection state
-      restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
-
       if (totalReplicasPerShard > 1) {
         log.info("Adding replicas to restored collection={}", restoreCollection);
         for (Slice slice : restoreCollection.getSlices()) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a57ca8c/solr/core/src/test/org/apache/solr/cloud/api/collections/AbstractCloudBackupRestoreTestCase.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/AbstractCloudBackupRestoreTestCase.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/AbstractCloudBackupRestoreTestCase.java
index 058814c..46e6faf 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/AbstractCloudBackupRestoreTestCase.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/AbstractCloudBackupRestoreTestCase.java
@@ -93,12 +93,12 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
     replFactor = TestUtil.nextInt(random(), 1, 2);
     numTlogReplicas = TestUtil.nextInt(random(), 0, 1);
     numPullReplicas = TestUtil.nextInt(random(), 0, 1);
-    
+
     CollectionAdminRequest.Create create = isImplicit ?
-      // NOTE: use shard list with same # of shards as NUM_SHARDS; we assume this later
-      CollectionAdminRequest.createCollectionWithImplicitRouter(getCollectionName(), "conf1", "shard1,shard2", replFactor, numTlogReplicas, numPullReplicas) :
-      CollectionAdminRequest.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor, numTlogReplicas, numPullReplicas);
-    
+        // NOTE: use shard list with same # of shards as NUM_SHARDS; we assume this later
+        CollectionAdminRequest.createCollectionWithImplicitRouter(getCollectionName(), "conf1", "shard1,shard2", replFactor, numTlogReplicas, numPullReplicas) :
+        CollectionAdminRequest.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor, numTlogReplicas, numPullReplicas);
+
     if (NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) > cluster.getJettySolrRunners().size() || random().nextBoolean()) {
       create.setMaxShardsPerNode((int)Math.ceil(NUM_SHARDS * (replFactor + numTlogReplicas + numPullReplicas) / cluster.getJettySolrRunners().size()));//just to assert it survives the restoration
       if (doSplitShardOperation) {
@@ -122,7 +122,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
     CloudSolrClient solrClient = cluster.getSolrClient();
     create.process(solrClient);
 
-    indexDocs(getCollectionName());
+    indexDocs(getCollectionName(), false);
 
     if (doSplitShardOperation) {
       // shard split the first shard
@@ -197,23 +197,29 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
     return cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(collectionName).getActiveSlices().size();
   }
 
-  private void indexDocs(String collectionName) throws Exception {
+  private int indexDocs(String collectionName, boolean useUUID) throws Exception {
     Random random = new Random(docsSeed);// use a constant seed for the whole test run so that we can easily re-index.
     int numDocs = random.nextInt(100);
     if (numDocs == 0) {
       log.info("Indexing ZERO test docs");
-      return;
+      return 0;
     }
+
     List<SolrInputDocument> docs = new ArrayList<>(numDocs);
     for (int i=0; i<numDocs; i++) {
       SolrInputDocument doc = new SolrInputDocument();
-      doc.addField("id", i);
+      doc.addField("id", ((useUUID == true) ? java.util.UUID.randomUUID().toString() : i));
       doc.addField("shard_s", "shard" + (1 + random.nextInt(NUM_SHARDS))); // for implicit router
       docs.add(doc);
     }
+
     CloudSolrClient client = cluster.getSolrClient();
-    client.add(collectionName, docs);// batch
+    client.add(collectionName, docs); //batch
     client.commit(collectionName);
+
+    log.info("Indexed {} docs to collection: {}", numDocs, collectionName);
+
+    return numDocs;
   }
 
   private void testBackupAndRestore(String collectionName) throws Exception {
@@ -257,7 +263,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
         // may need to increase maxShardsPerNode (e.g. if it was shard split, then now we need more)
         restore.setMaxShardsPerNode((int)Math.ceil(backupCollection.getReplicas().size()/cluster.getJettySolrRunners().size()));
       }
-      
+
 
       if (rarely()) { // Try with createNodeSet configuration
         int nodeSetSize = cluster.getJettySolrRunners().size() / 2;
@@ -298,7 +304,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
     //Re-index same docs (should be identical docs given same random seed) and test we have the same result.  Helps
     //  test we reconstituted the hash ranges / doc router.
     if (!(restoreCollection.getRouter() instanceof ImplicitDocRouter) && random().nextBoolean()) {
-      indexDocs(restoreCollectionName);
+      indexDocs(restoreCollectionName, false);
       assertEquals(origShardToDocCount, getShardToDocCountMap(client, restoreCollection));
     }
 
@@ -327,6 +333,18 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa
 
     assertEquals("Restore collection should use stateFormat=2", 2, restoreCollection.getStateFormat());
 
+    //SOLR-12605: Add more docs after restore is complete to see if they are getting added fine
+    //explicitly querying the leaders. If we use CloudSolrClient there is no guarantee that we'll hit a nrtReplica
+    {
+      Map<String, Integer> restoredCollectionPerShardCount =  getShardToDocCountMap(client, restoreCollection);
+      long restoredCollectionDocCount = restoredCollectionPerShardCount.values().stream().mapToInt(Number::intValue).sum();
+      int numberNewDocsIndexed = indexDocs(restoreCollectionName, true);
+      Map<String, Integer> restoredCollectionPerShardCountAfterIndexing = getShardToDocCountMap(client, restoreCollection);
+      int restoredCollectionFinalDocCount = restoredCollectionPerShardCountAfterIndexing.values().stream().mapToInt(Number::intValue).sum();
+
+      log.info("Original doc count in restored collection:" + restoredCollectionDocCount + ", number of newly added documents to the restored collection: " + numberNewDocsIndexed + ", after indexing: " + restoredCollectionFinalDocCount);
+      assertEquals((restoredCollectionDocCount + numberNewDocsIndexed), restoredCollectionFinalDocCount);
+    }
 
     // assert added core properties:
     // DWS: did via manual inspection.

[30/46] lucene-solr:jira/solr-11833: LUCENE-8251: Add AwaitsFix for the tests that this issue covers.

Posted by ab...@apache.org.

LUCENE-8251: Add AwaitsFix for the tests that this issue covers.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/79350bd4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/79350bd4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/79350bd4

Branch: refs/heads/jira/solr-11833
Commit: 79350bd4dd31a67c05f08e6484561c38494d4773
Parents: f41e7c4
Author: Karl Wright <Da...@gmail.com>
Authored: Fri Apr 13 09:39:31 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Fri Apr 13 09:39:31 2018 -0400

----------------------------------------------------------------------
 .../spatial3d/geom/GeoComplexPolygon.java       | 30 +++++++++++++-------
 .../lucene/spatial3d/geom/GeoPolygonTest.java   | 17 +++++------
 .../spatial3d/geom/RandomGeoPolygonTest.java    |  2 +-
 3 files changed, 29 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/79350bd4/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
index c4e2c93..b6b6577 100644
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
@@ -1146,12 +1146,17 @@ class GeoComplexPolygon extends GeoBasePolygon {
       this.intersectionPoint = intersectionPoint;
       
       //System.out.println("Intersection point = "+intersectionPoint);
-        
+      //System.out.println("TestPoint plane: "+testPoint+" -> "+intersectionPoint);
+      //System.out.println("Travel plane: ["+thePointX+","+thePointY+","+thePointZ+"] -> "+intersectionPoint);
+      
       assert travelPlane.evaluateIsZero(intersectionPoint) : "intersection point must be on travel plane";
       assert testPointPlane.evaluateIsZero(intersectionPoint) : "intersection point must be on test point plane";
-        
+      
+      //System.out.println("Test point distance to intersection point: "+intersectionPoint.linearDistance(testPoint));
+      //System.out.println("Check point distance to intersection point: "+intersectionPoint.linearDistance(thePointX, thePointY, thePointZ));
+
       assert !testPoint.isNumericallyIdentical(intersectionPoint) : "test point is the same as intersection point";
-      assert !intersectionPoint.isNumericallyIdentical(thePointX, thePointY, thePointZ) : "check point is same is intersection point";
+      assert !intersectionPoint.isNumericallyIdentical(thePointX, thePointY, thePointZ) : "check point is same as intersection point";
 
       this.testPointCutoffPlane = new SidedPlane(intersectionPoint, testPointPlane, testPoint);
       this.checkPointCutoffPlane = new SidedPlane(intersectionPoint, travelPlane, thePointX, thePointY, thePointZ);
@@ -1324,7 +1329,7 @@ class GeoComplexPolygon extends GeoBasePolygon {
       System.out.println("");
       System.out.println("Considering edge "+(edge.startPoint)+" -> "+(edge.endPoint));
       */
-      
+
       // Some edges are going to be given to us even when there's no real intersection, so do that as a sanity check, first.
       final GeoPoint[] travelCrossings = travelPlane.findIntersections(planetModel, edge.plane, checkPointCutoffPlane, checkPointOtherCutoffPlane, edge.startPlane, edge.endPlane);
       if (travelCrossings != null && travelCrossings.length == 0) {
@@ -1442,7 +1447,10 @@ class GeoComplexPolygon extends GeoBasePolygon {
 
   }
   
-    
+  /** This is the amount we go, roughly, in both directions, to find adjoining points to test.  If we go too far,
+    * we might miss a transition, but if we go too little, we might not see it either due to numerical issues.
+    */
+  private final static double DELTA_DISTANCE = Vector.MINIMUM_RESOLUTION;// * 0.5;
   
   /** Given a point on the plane and the ellipsoid, this method looks for a pair of adjoining points on either side of the plane, which are
    * about MINIMUM_RESOLUTION away from the given point.  This only works for planes which go through the center of the world.
@@ -1451,12 +1459,12 @@ class GeoComplexPolygon extends GeoBasePolygon {
     // Compute a normalized perpendicular vector
     final Vector perpendicular = new Vector(plane, pointOnPlane);
     // Compute two new points along this vector from the original
-    final GeoPoint pointA = planetModel.createSurfacePoint(pointOnPlane.x + perpendicular.x * Vector.MINIMUM_RESOLUTION,
-      pointOnPlane.y + perpendicular.y * Vector.MINIMUM_RESOLUTION,
-      pointOnPlane.z + perpendicular.z * Vector.MINIMUM_RESOLUTION);
-    final GeoPoint pointB = planetModel.createSurfacePoint(pointOnPlane.x - perpendicular.x * Vector.MINIMUM_RESOLUTION,
-      pointOnPlane.y - perpendicular.y * Vector.MINIMUM_RESOLUTION,
-      pointOnPlane.z - perpendicular.z * Vector.MINIMUM_RESOLUTION);
+    final GeoPoint pointA = planetModel.createSurfacePoint(pointOnPlane.x + perpendicular.x * DELTA_DISTANCE,
+      pointOnPlane.y + perpendicular.y * DELTA_DISTANCE,
+      pointOnPlane.z + perpendicular.z * DELTA_DISTANCE);
+    final GeoPoint pointB = planetModel.createSurfacePoint(pointOnPlane.x - perpendicular.x * DELTA_DISTANCE,
+      pointOnPlane.y - perpendicular.y * DELTA_DISTANCE,
+      pointOnPlane.z - perpendicular.z * DELTA_DISTANCE);
     //System.out.println("Distance: "+computeSquaredDistance(rval[0], pointOnPlane)+" and "+computeSquaredDistance(rval[1], pointOnPlane));
     return new GeoPoint[]{pointA, pointB};
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/79350bd4/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
index e720cb2..86f5694 100755
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
@@ -1570,20 +1570,21 @@ shape:
   }
   
   @Test
+  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
   public void testLUCENE8251() {
     //POLYGON((135.63207358036593 -51.43541696593334,113.00782694696038 -58.984559858566556,0.0 -3.68E-321,-66.33598777585381 -7.382056816201731,135.63207358036593 -51.43541696593334))
     final List<GeoPoint> points = new ArrayList<>();
-    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-51.43541696593334), Geo3DUtil.fromDegrees(135.63207358036593)));
-    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-58.984559858566556), Geo3DUtil.fromDegrees(113.00782694696038)));
-    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-3.68E-321), Geo3DUtil.fromDegrees(0.0)));
-    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-7.382056816201731), Geo3DUtil.fromDegrees(-66.33598777585381)));
-    points.add(new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(-51.43541696593334), Geo3DUtil.fromDegrees(135.63207358036593)));
+    points.add(new GeoPoint(PlanetModel.WGS84, Geo3DUtil.fromDegrees(-51.43541696593334), Geo3DUtil.fromDegrees(135.63207358036593)));
+    points.add(new GeoPoint(PlanetModel.WGS84, Geo3DUtil.fromDegrees(-58.984559858566556), Geo3DUtil.fromDegrees(113.00782694696038)));
+    points.add(new GeoPoint(PlanetModel.WGS84, Geo3DUtil.fromDegrees(-3.68E-321), Geo3DUtil.fromDegrees(0.0)));
+    points.add(new GeoPoint(PlanetModel.WGS84, Geo3DUtil.fromDegrees(-7.382056816201731), Geo3DUtil.fromDegrees(-66.33598777585381)));
+    points.add(new GeoPoint(PlanetModel.WGS84, Geo3DUtil.fromDegrees(-51.43541696593334), Geo3DUtil.fromDegrees(135.63207358036593)));
     final GeoPolygonFactory.PolygonDescription description = new GeoPolygonFactory.PolygonDescription(points);
-    final GeoPolygon polygon = GeoPolygonFactory.makeGeoPolygon(PlanetModel.SPHERE, description);
-    final GeoPolygon largePolygon = GeoPolygonFactory.makeLargeGeoPolygon(PlanetModel.SPHERE, Collections.singletonList(description));
+    final GeoPolygon polygon = GeoPolygonFactory.makeGeoPolygon(PlanetModel.WGS84, description);
+    final GeoPolygon largePolygon = GeoPolygonFactory.makeLargeGeoPolygon(PlanetModel.WGS84, Collections.singletonList(description));
 
     //POINT(0.005183505059185348 1.98E-321)
-    final GeoPoint point = new GeoPoint(PlanetModel.SPHERE, Geo3DUtil.fromDegrees(1.98E-321), Geo3DUtil.fromDegrees(0.005183505059185348));
+    final GeoPoint point = new GeoPoint(PlanetModel.WGS84, Geo3DUtil.fromDegrees(1.98E-321), Geo3DUtil.fromDegrees(0.005183505059185348));
     assertTrue(polygon.isWithin(point) == largePolygon.isWithin(point));
   }
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/79350bd4/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
index 6c5e890..b6364e0 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
@@ -92,7 +92,7 @@ public class RandomGeoPolygonTest extends RandomGeo3dShapeGenerator {
    * biased doubles.
    */
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
+  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
   @Repeat(iterations = 10)
   public void testComparePolygons() {
     final PlanetModel planetModel = randomPlanetModel();

[12/46] lucene-solr:jira/solr-11833: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

Posted by ab...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7a493710
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7a493710
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7a493710

Branch: refs/heads/jira/solr-11833
Commit: 7a4937106c0a551a958a726c6c6fbf9145c4637d
Parents: 832e897 19b4483
Author: Karl Wright <Da...@gmail.com>
Authored: Thu Apr 12 07:09:30 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Thu Apr 12 07:09:30 2018 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/index/PendingDeletes.java |  8 ++++++-
 .../apache/lucene/index/PendingSoftDeletes.java |  3 ++-
 .../apache/lucene/index/ReadersAndUpdates.java  |  1 +
 .../apache/lucene/index/TestPendingDeletes.java |  8 +++----
 .../lucene/index/TestPendingSoftDeletes.java    | 23 +++++++++++++++-----
 .../apache/lucene/search/TestLRUQueryCache.java |  9 ++++----
 .../solr/cloud/LeaderVoteWaitTimeoutTest.java   |  3 ---
 7 files changed, 35 insertions(+), 20 deletions(-)
----------------------------------------------------------------------

[38/46] lucene-solr:jira/solr-11833: LUCENE-8248: remove deprecated MergePolicyWrapper for 8.x

Posted by ab...@apache.org.

LUCENE-8248: remove deprecated MergePolicyWrapper for 8.x


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e2e89d1a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e2e89d1a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e2e89d1a

Branch: refs/heads/jira/solr-11833
Commit: e2e89d1a608c1143db77c1e7175672e4fa19ea3e
Parents: 7c0387a
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Apr 13 16:32:42 2018 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Apr 13 16:32:42 2018 -0400

----------------------------------------------------------------------
 .../apache/lucene/index/MergePolicyWrapper.java | 45 --------------------
 .../lucene/index/TestFilterMergePolicy.java     |  3 +-
 2 files changed, 1 insertion(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e2e89d1a/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java
deleted file mode 100644
index 4731e5e..0000000
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.io.IOException;
-
-import org.apache.lucene.util.IOSupplier;
-
-/**
- * A wrapper for {@link MergePolicy} instances.
- * @deprecated replace with FilterMergePolicy
- * @lucene.experimental
- */
-@Deprecated
-public class MergePolicyWrapper extends FilterMergePolicy {
-  // placeholder for backwards compatibility
-
-  /**
-   * Creates a new merge policy instance wrapping another.
-   *
-   * @param in the wrapped {@link MergePolicy}
-   */
-  public MergePolicyWrapper(MergePolicy in) {
-    super(in);
-  }
-
-  @Override
-  public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount, IOSupplier<CodecReader> readerSupplier) throws IOException {
-    return in.numDeletesToMerge(info, pendingDeleteCount, readerSupplier);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e2e89d1a/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java
index 4057d3c..58baa5d 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java
@@ -29,9 +29,8 @@ public class TestFilterMergePolicy extends LuceneTestCase {
       try {
         FilterMergePolicy.class.getDeclaredMethod(m.getName(),  m.getParameterTypes());
       } catch (NoSuchMethodException e) {
-        fail("MergePolicyWrapper needs to override '" + m + "'");
+        fail("FilterMergePolicy needs to override '" + m + "'");
       }
     }
   }
-
 }

[28/46] lucene-solr:jira/solr-11833: SOLR-12181: Fix NPE. Disable the test until it's fixed.

Posted by ab...@apache.org.

SOLR-12181: Fix NPE. Disable the test until it's fixed.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dfc05faa
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dfc05faa
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dfc05faa

Branch: refs/heads/jira/solr-11833
Commit: dfc05faa352c7037c4924aafb2f5746872e36448
Parents: e851b89
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Fri Apr 13 12:44:59 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Fri Apr 13 12:45:54 2018 +0200

----------------------------------------------------------------------
 .../apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java | 2 ++
 .../solr/cloud/autoscaling/sim/SimClusterStateProvider.java | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dfc05faa/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
index 79dd019..3bf7021 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
@@ -29,6 +29,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 
+import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrRequest;
@@ -64,6 +65,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_P
  *
  */
 @LogLevel("org.apache.solr.cloud.autoscaling=DEBUG")
+@LuceneTestCase.AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12181")
 public class IndexSizeTriggerTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dfc05faa/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
index 9b3782a..12aa9c1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/SimClusterStateProvider.java
@@ -1049,9 +1049,14 @@ public class SimClusterStateProvider implements ClusterStateProvider {
         for (String id : deletes) {
           Slice s = router.getTargetSlice(id, null, null, req.getParams(), coll);
           // NOTE: we don't use getProperty because it uses PROPERTY_PROP_PREFIX
-          String numDocsStr = s.getLeader().getStr("SEARCHER.searcher.numDocs");
+          Replica leader = s.getLeader();
+          if (leader == null) {
+            LOG.debug("-- no leader in " + s);
+            continue;
+          }
+          String numDocsStr = leader.getStr("SEARCHER.searcher.numDocs");
           if (numDocsStr == null) {
-            LOG.debug("-- no docs in " + s.getLeader());
+            LOG.debug("-- no docs in " + leader);
             continue;
           }
           long numDocs = Long.parseLong(numDocsStr);

[19/46] lucene-solr:jira/solr-11833: LUCENE-8245: Make precommit happy, again.

Posted by ab...@apache.org.

LUCENE-8245: Make precommit happy, again.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1d201f3c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1d201f3c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1d201f3c

Branch: refs/heads/jira/solr-11833
Commit: 1d201f3c18ef150132e329bac6bb8ecc3ca8c4e0
Parents: 3d5f2f2
Author: Karl Wright <Da...@gmail.com>
Authored: Thu Apr 12 23:32:46 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Thu Apr 12 23:32:46 2018 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/spatial3d/geom/SidedPlane.java    | 12 ++++++++++++
 1 file changed, 12 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1d201f3c/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
index 238933c..61d561d 100755
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/SidedPlane.java
@@ -234,12 +234,24 @@ public class SidedPlane extends Plane implements Membership {
     return sigNum == this.sigNum;
   }
 
+  /**
+   * Check whether a point is strictly within a plane.
+   * @param v is the point.
+   * @return true if within.
+   */
   public boolean strictlyWithin(final Vector v) {
     double evalResult = evaluate(v.x, v.y, v.z);
     double sigNum = Math.signum(evalResult);
     return sigNum == 0.0 || sigNum == this.sigNum;
   }
 
+  /**
+   * Check whether a point is strictly within a plane.
+   * @param x is the point x value.
+   * @param y is the point y value.
+   * @param z is the point z value.
+   * @return true if within.
+   */
   public boolean strictlyWithin(double x, double y, double z) {
     double evalResult = evaluate(x, y, z);
     double sigNum = Math.signum(evalResult);

[40/46] lucene-solr:jira/solr-11833: SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader

Posted by ab...@apache.org.

SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/93f9a65b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/93f9a65b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/93f9a65b

Branch: refs/heads/jira/solr-11833
Commit: 93f9a65b1c8aa460489fdce50ed84d18168b53ef
Parents: e4eb8a8
Author: Varun Thacker <va...@apache.org>
Authored: Fri Apr 13 12:07:42 2018 -0700
Committer: Varun Thacker <va...@apache.org>
Committed: Fri Apr 13 15:38:26 2018 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 ++
 .../solr/handler/CdcrReplicatorManager.java     | 29 +++++++++++++++
 .../solr/cloud/cdcr/CdcrBootstrapTest.java      |  5 +--
 .../apache/solr/cloud/cdcr/CdcrTestsUtil.java   | 38 ++++++++++++++++++++
 4 files changed, 73 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index de748be..b39fd6f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -156,6 +156,9 @@ Bug Fixes
 * SOLR-12150: Fix a test bug in CdcrBidirectionalTest.testBiDir (Steve Rowe, Amrit Sarkar via Varun Thacker)
 
 * SOLR-10513: ConjunctionSolrSpellChecker did not work with LuceneLevenshteinDistance (Amrit Sarkar via James Dyer)
+
+* SOLR-11724: Cdcr bootstrapping should ensure that non-leader replicas should sync with the leader
+  (Amrit Sarkar, Varun Thacker)
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
index abd6ed7..8ec3c8b 100644
--- a/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
+++ b/solr/core/src/java/org/apache/solr/handler/CdcrReplicatorManager.java
@@ -20,6 +20,7 @@ import java.io.Closeable;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
@@ -36,11 +37,14 @@ import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CoreAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ExecutorUtil;
@@ -298,6 +302,8 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
                 checkpoint, collectionName, shard);
             CdcrUpdateLog.CdcrLogReader reader1 = ulog.newLogReader();
             reader1.seek(checkpoint);
+            // issue asynchronous request_recovery to the follower nodes of the shards of target collection
+            sendRequestRecoveryToFollowers(state);
             success = true;
             break;
           } else if (status == BootstrapStatus.FAILED) {
@@ -411,6 +417,29 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
     return client.request(request);
   }
 
+  private void sendRequestRecoveryToFollowers(CdcrReplicatorState state) throws SolrServerException, IOException {
+    Collection<Slice> slices = state.getClient().getZkStateReader().getClusterState().getCollection(state.getTargetCollection()).getActiveSlices();
+    for (Slice slice : slices) {
+      Collection<Replica> replicas = slice.getReplicas();
+      for (Replica replica : replicas) {
+        if (slice.getLeader().getCoreName().equals(replica.getCoreName())) {
+          continue; // no need to request recovery for leader
+        }
+        sendRequestRecoveryToFollower(state.getClient(), replica.getCoreName());
+        log.info("RequestRecovery cmd is issued by core: " + replica.getCoreName() + " of shard: " + slice.getName() +
+            "for target: " + state.getTargetCollection());
+      }
+    }
+  }
+
+  private NamedList sendRequestRecoveryToFollower(SolrClient client, String coreName) throws SolrServerException, IOException {
+    CoreAdminRequest.RequestRecovery recoverRequestCmd = new CoreAdminRequest.RequestRecovery();
+    recoverRequestCmd.setAction(CoreAdminParams.CoreAdminAction.REQUESTRECOVERY);
+    recoverRequestCmd.setCoreName(coreName);
+    return client.request(recoverRequestCmd);
+  }
+
+
   private enum BootstrapStatus  {
     SUBMITTED,
     RUNNING,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
index cae9855..543bd5c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java
@@ -105,7 +105,8 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
 
         // setup the target cluster
         target.uploadConfigSet(configset("cdcr-target"), "cdcr-target");
-        CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 1)
+        CollectionAdminRequest.createCollection("cdcr-target", "cdcr-target", 1, 2)
+            .setMaxShardsPerNode(2)
             .process(target.getSolrClient());
         CloudSolrClient targetSolrClient = target.getSolrClient();
         targetSolrClient.setDefaultCollection("cdcr-target");
@@ -118,6 +119,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
         log.info("Cdcr queue response: " + response.getResponse());
         long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient);
         assertEquals("Document mismatch on target after sync", numDocs, foundDocs);
+        assertTrue(CdcrTestsUtil.assertShardInSync("cdcr-target", "shard1", targetSolrClient)); // with more than 1 replica
 
         params = new ModifiableSolrParams();
         params.set(CommonParams.ACTION, CdcrParams.CdcrAction.COLLECTIONCHECKPOINT.toString());
@@ -300,5 +302,4 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
       target.shutdown();
     }
   }
-
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93f9a65b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
index 99aa471..6a186fd 100644
--- a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
+++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java
@@ -26,11 +26,17 @@ import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.handler.CdcrParams;
+import org.apache.solr.util.TimeOut;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -108,4 +114,36 @@ public class CdcrTestsUtil extends SolrTestCaseJ4{
     }
     return response != null ? response.getResults().getNumFound() : 0;
   }
+
+  protected static boolean assertShardInSync(String collection, String shard, CloudSolrClient client) throws IOException, SolrServerException {
+    TimeOut waitTimeOut = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
+    DocCollection docCollection = client.getZkStateReader().getClusterState().getCollection(collection);
+    Slice correctSlice = null;
+    for (Slice slice : docCollection.getSlices()) {
+      if (shard.equals(slice.getName())) {
+        correctSlice = slice;
+        break;
+      }
+    }
+    assertNotNull(correctSlice);
+
+    long leaderDocCount;
+    try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(correctSlice.getLeader().getCoreUrl()).withHttpClient(client.getHttpClient()).build()) {
+      leaderDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound();
+    }
+
+    while (!waitTimeOut.hasTimedOut()) {
+      int replicasInSync = 0;
+      for (Replica replica : correctSlice.getReplicas()) {
+        try (HttpSolrClient leaderClient = new HttpSolrClient.Builder(replica.getCoreUrl()).withHttpClient(client.getHttpClient()).build()) {
+          long replicaDocCount = leaderClient.query(new SolrQuery("*:*").setParam("distrib", "false")).getResults().getNumFound();
+          if (replicaDocCount == leaderDocCount) replicasInSync++;
+        }
+      }
+      if (replicasInSync == correctSlice.getReplicas().size()) {
+        return true;
+      }
+    }
+    return false;
+  }
 }

[29/46] lucene-solr:jira/solr-11833: LUCENE-8231: update CHANGES.txt after backport to 7x

Posted by ab...@apache.org.

LUCENE-8231: update CHANGES.txt after backport to 7x


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0544486b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0544486b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0544486b

Branch: refs/heads/jira/solr-11833
Commit: 0544486b3912c88cadf0a0307074c0bf255d5415
Parents: dfc05fa
Author: Jim Ferenczi <ji...@apache.org>
Authored: Fri Apr 13 14:21:03 2018 +0200
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Fri Apr 13 14:21:03 2018 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0544486b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a0e339e..58d234f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -41,12 +41,6 @@ API Changes
 * LUCENE-8242: Deprecated method IndexSearcher#createNormalizedWeight() has
   been removed (Alan Woodward)
 
-New Features
-
-* LUCENE-8231: A new analysis module (nori) similar to Kuromoji
-  but to handle Korean using mecab-ko-dic and morphological analysis.
-  (Robert Muir, Jim Ferenczi)
-
 Changes in Runtime Behavior
 
 * LUCENE-7837: Indices that were created before the previous major version
@@ -149,6 +143,10 @@ New Features
   helps merge policies in the soft-delete case to correctly implement retention
   policies without triggering uncessary merges. (Simon Willnauer, Mike McCandless)
 
+* LUCENE-8231: A new analysis module (nori) similar to Kuromoji
+  but to handle Korean using mecab-ko-dic and morphological analysis.
+  (Robert Muir, Jim Ferenczi)
+
 Bug Fixes
 
 * LUCENE-8234: Fixed bug in how spatial relationship is computed for

[16/46] lucene-solr:jira/solr-11833: SOLR-11336: DocBasedVersionConstraints URP is now more extensible; versionField is a list

Posted by ab...@apache.org.

SOLR-11336: DocBasedVersionConstraints URP is now more extensible; versionField is a list


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3d5f2f24
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3d5f2f24
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3d5f2f24

Branch: refs/heads/jira/solr-11833
Commit: 3d5f2f24c346913eca3b932682950f82d2e4c3f9
Parents: 4f694d5
Author: David Smiley <ds...@apache.org>
Authored: Thu Apr 12 14:04:28 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Thu Apr 12 14:04:28 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../DocBasedVersionConstraintsProcessor.java    | 512 +++++++++++++++++++
 ...BasedVersionConstraintsProcessorFactory.java | 441 +++-------------
 .../solrconfig-externalversionconstraint.xml    |  32 +-
 .../update/TestDocBasedVersionConstraints.java  |  94 +++-
 .../src/updating-parts-of-documents.adoc        |  10 +-
 6 files changed, 694 insertions(+), 398 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3d5f2f24/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 0c2feaf..c2e6da7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -93,6 +93,9 @@ New Features
 * SOLR-11982: Add possibility to define replica order with the shards.preference parameter to e.g. prefer PULL replicas
   for distributed queries. (Ere Maijala, Tomás Fernández Löbbe)
 
+* SOLR-11336: DocBasedVersionConstraintsProcessorFactory is more extensible and now supports a list of versioned fields.
+  (versionField config may now be a comma-delimited list). (Michael Braun via David Smiley)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3d5f2f24/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessor.java
new file mode 100644
index 0000000..5bc60ec
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessor.java
@@ -0,0 +1,512 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.queries.function.FunctionValues;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.RealTimeGetComponent;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+import org.apache.solr.update.UpdateCommand;
+import org.apache.solr.util.RefCounted;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
+import static org.apache.solr.common.SolrException.ErrorCode.CONFLICT;
+import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
+import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
+
+public class DocBasedVersionConstraintsProcessor extends UpdateRequestProcessor {
+  private static final String[] EMPTY_STR_ARR = new String[0];
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private final String[] versionFieldNames;
+  private final SchemaField[] userVersionFields;
+  private final SchemaField solrVersionField;
+  private final boolean ignoreOldUpdates;
+  private final String[] deleteVersionParamNames;
+  private final SolrCore core;
+
+  private final DistributedUpdateProcessor distribProc;  // the distributed update processor following us
+  private final DistributedUpdateProcessor.DistribPhase phase;
+  private final boolean useFieldCache;
+
+  private long oldSolrVersion;  // current _version_ of the doc in the index/update log
+
+  public DocBasedVersionConstraintsProcessor(List<String> versionFields,
+                                             boolean ignoreOldUpdates,
+                                             List<String> deleteVersionParamNames,
+                                             boolean useFieldCache,
+                                             SolrQueryRequest req,
+                                             SolrQueryResponse rsp,
+                                             UpdateRequestProcessor next ) {
+    super(next);
+    this.ignoreOldUpdates = ignoreOldUpdates;
+    this.deleteVersionParamNames = deleteVersionParamNames.toArray(EMPTY_STR_ARR);
+    this.core = req.getCore();
+    this.versionFieldNames = versionFields.toArray(EMPTY_STR_ARR);
+    IndexSchema schema = core.getLatestSchema();
+    userVersionFields = new SchemaField[versionFieldNames.length];
+    for (int i = 0; i < versionFieldNames.length; i++) {
+      userVersionFields[i] = schema.getField(versionFieldNames[i]);
+    }
+    this.solrVersionField = schema.getField(CommonParams.VERSION_FIELD);
+    this.useFieldCache = useFieldCache;
+
+    this.distribProc = getDistributedUpdateProcessor(next);
+
+    this.phase = DistributedUpdateProcessor.DistribPhase.parseParam(req.getParams().get(DISTRIB_UPDATE_PARAM));
+  }
+
+  private static DistributedUpdateProcessor getDistributedUpdateProcessor(UpdateRequestProcessor next) {
+    for (UpdateRequestProcessor proc = next; proc != null; proc = proc.next) {
+      if (proc instanceof DistributedUpdateProcessor) {
+        return (DistributedUpdateProcessor)proc;
+      }
+    }
+
+    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "DistributedUpdateProcessor must follow DocBasedVersionConstraintsProcessor");
+  }
+
+  /**
+   * Inspects a raw field value (which may come from a doc in the index, or a
+   * doc in the UpdateLog that still has String values, or a String sent by
+   * the user as a param) and if it is a String, asks the versionField FieldType
+   * to convert it to an Object suitable for comparison.
+   */
+  private static Object convertFieldValueUsingType(final Object rawValue, SchemaField field) {
+    if (rawValue instanceof CharSequence) {
+      // in theory, the FieldType might still be CharSequence based,
+      // but in that case trust it to do an identity conversion...
+      FieldType fieldType = field.getType();
+      BytesRefBuilder term = new BytesRefBuilder();
+      fieldType.readableToIndexed((CharSequence)rawValue, term);
+      return fieldType.toObject(field, term.get());
+    }
+    // else...
+    return rawValue;
+  }
+
+  private static Object[] convertFieldValuesUsingType(Object[] rawValues, SchemaField[] fields) {
+    Object[] returnArr = new Object[rawValues.length];
+    for (int i = 0; i < returnArr.length; i++) {
+      returnArr[i] = convertFieldValueUsingType(rawValues[i], fields[i]);
+    }
+    return returnArr;
+  }
+
+  /**
+   * Returns true if the specified new version values are greater the the ones
+   * already known to exist for the document, or if the document does not already
+   * exist.
+   * Returns false if the specified new versions are not high enough but the
+   * processor has been configured with ignoreOldUpdates=true
+   * Throws a SolrException if the version is not high enough and
+   * ignoreOldUpdates=false
+   */
+  private boolean isVersionNewEnough(BytesRef indexedDocId,
+                                     Object[] newUserVersions) throws IOException {
+    assert null != indexedDocId;
+    assert null != newUserVersions;
+
+    newUserVersions = convertFieldValuesUsingType(newUserVersions, userVersionFields);
+
+    final DocFoundAndOldUserAndSolrVersions docFoundAndOldUserVersions;
+    if (useFieldCache) {
+      docFoundAndOldUserVersions = getOldUserVersionsFromFieldCache(indexedDocId);
+    } else {
+      docFoundAndOldUserVersions = getOldUserVersionsFromStored(indexedDocId);
+    }
+    oldSolrVersion = docFoundAndOldUserVersions.oldSolrVersion;
+
+    if (!docFoundAndOldUserVersions.found) {
+      return true;
+    }
+    final Object[] oldUserVersions = docFoundAndOldUserVersions.oldUserVersions;
+
+    validateUserVersions(oldUserVersions, versionFieldNames, "Doc exists in index, but has null versionField: ");
+
+    return versionInUpdateIsAcceptable(newUserVersions, oldUserVersions);
+  }
+
+  private static void validateUserVersions(Object[] userVersions, String[] fieldNames, String errorMessage) {
+    assert userVersions.length == fieldNames.length;
+    for (int i = 0; i < userVersions.length; i++) {
+      Object userVersion = userVersions[i];
+      if ( null == userVersion) {
+        // could happen if they turn this feature on after building an index
+        // w/o the versionField, or if validating a new doc, not present.
+        throw new SolrException(SERVER_ERROR, errorMessage + fieldNames[i]);
+      }
+    }
+  }
+
+  private DocFoundAndOldUserAndSolrVersions getOldUserVersionsFromFieldCache(BytesRef indexedDocId) {
+    SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocumentFromTlog(core, indexedDocId, null, null, true);
+    if (oldDoc == RealTimeGetComponent.DELETED) {
+      return DocFoundAndOldUserAndSolrVersions.NOT_FOUND;
+    }
+    if (oldDoc == null) {
+      // need to look up in index now...
+      RefCounted<SolrIndexSearcher> newestSearcher = core.getRealtimeSearcher();
+      try {
+        SolrIndexSearcher searcher = newestSearcher.get();
+        long lookup = searcher.lookupId(indexedDocId);
+        if (lookup < 0) {
+          // doc not in index either...
+          return DocFoundAndOldUserAndSolrVersions.NOT_FOUND;
+        }
+        final LeafReaderContext segmentContext = searcher.getTopReaderContext().leaves().get((int)(lookup>>32));
+        final int docIdInSegment = (int)lookup;
+
+        long oldSolrVersion = getFunctionValues(segmentContext, solrVersionField, searcher).longVal(docIdInSegment);
+        Object[] oldUserVersions = getObjectValues(segmentContext, userVersionFields, searcher, docIdInSegment);
+        return new DocFoundAndOldUserAndSolrVersions(oldUserVersions, oldSolrVersion);
+      } catch (IOException e) {
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading version from index", e);
+      } finally {
+        if (newestSearcher != null) { //TODO can this ever be null?
+          newestSearcher.decref();
+        }
+      }
+    } else {
+      return getUserVersionAndSolrVersionFromDocument(oldDoc);
+    }
+  }
+
+  private DocFoundAndOldUserAndSolrVersions getOldUserVersionsFromStored(BytesRef indexedDocId) throws IOException {
+    // stored fields only...
+    SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocument(core, indexedDocId);
+    if (null == oldDoc) {
+      return DocFoundAndOldUserAndSolrVersions.NOT_FOUND;
+    } else {
+      return getUserVersionAndSolrVersionFromDocument(oldDoc);
+    }
+  }
+
+  private static final class DocFoundAndOldUserAndSolrVersions {
+    private static final DocFoundAndOldUserAndSolrVersions NOT_FOUND = new DocFoundAndOldUserAndSolrVersions();
+    private final boolean found;
+    private final Object[] oldUserVersions;
+    private final long oldSolrVersion;
+
+    private DocFoundAndOldUserAndSolrVersions() {
+      this.found = false;
+      this.oldSolrVersion = -1;
+      this.oldUserVersions = null;
+    }
+
+    private DocFoundAndOldUserAndSolrVersions(Object[] oldUserVersions, long oldSolrVersion) {
+      this.found = true;
+      this.oldUserVersions = oldUserVersions;
+      this.oldSolrVersion = oldSolrVersion;
+    }
+  }
+
+  private DocFoundAndOldUserAndSolrVersions getUserVersionAndSolrVersionFromDocument(SolrInputDocument oldDoc) {
+    Object[] oldUserVersions = getUserVersionsFromDocument(oldDoc);
+
+    Object o = oldDoc.getFieldValue(solrVersionField.getName());
+    if (o == null) {
+      throw new SolrException(SERVER_ERROR, "No _version_ for document " + oldDoc);
+    }
+    long solrVersion = o instanceof Number ? ((Number) o).longValue() : Long.parseLong(o.toString());
+
+    return new DocFoundAndOldUserAndSolrVersions(oldUserVersions, solrVersion);
+  }
+
+  private Object[] getUserVersionsFromDocument(SolrInputDocument doc) {
+    Object[] versions = new Object[versionFieldNames.length];
+    for (int i = 0; i < versionFieldNames.length; i++) {
+      String fieldName = versionFieldNames[i];
+      SchemaField schemaField = userVersionFields[i];
+      Object userVersion = doc.getFieldValue(fieldName);
+      // Make the FieldType resolve any conversion we need.
+      userVersion = convertFieldValueUsingType(userVersion, schemaField);
+      versions[i] = userVersion;
+    }
+    return versions;
+  }
+
+
+
+  /**
+   * Returns whether or not the versions in the command are acceptable to be indexed.
+   * If the instance is set to ignoreOldUpdates==false, it will throw a SolrException
+   * with CONFLICT in the event the version is not acceptable rather than return false.
+   *
+   * @param newUserVersions New versions in update request
+   * @param oldUserVersions Old versions currently in solr index
+   * @return True if acceptable, false if not (or will throw exception)
+   */
+  protected boolean versionInUpdateIsAcceptable(Object[] newUserVersions,
+                                                Object[] oldUserVersions) {
+
+    for (int i = 0; i < oldUserVersions.length; i++) {
+      Object oldUserVersion = oldUserVersions[i];
+      Object newUserVersion = newUserVersions[i];
+
+      if (!(oldUserVersion instanceof Comparable && newUserVersion instanceof Comparable)) {
+        throw new SolrException(BAD_REQUEST,
+            "old version and new version are not comparable: " +
+                oldUserVersion.getClass() + " vs " + newUserVersion.getClass());
+      }
+      try {
+        if (newUpdateComparePasses((Comparable) newUserVersion, (Comparable) oldUserVersion, versionFieldNames[i])) {
+          return true;
+        }
+      } catch (ClassCastException e) {
+        throw new SolrException(BAD_REQUEST,
+            "old version and new version are not comparable: " +
+                oldUserVersion.getClass() + " vs " + newUserVersion.getClass() +
+                ": " + e.getMessage(), e);
+
+      }
+    }
+    if (ignoreOldUpdates) {
+      if (log.isDebugEnabled()) {
+        log.debug("Dropping update since user version is not high enough: {}; old user version={}",
+            Arrays.toString(newUserVersions), Arrays.toString(oldUserVersions));
+      }
+      return false;
+    } else {
+      throw new SolrException(CONFLICT,
+          "user version is not high enough: " + Arrays.toString(newUserVersions));
+    }
+  }
+
+  /**
+   * Given two comparable user versions, returns whether the new version is acceptable
+   * to replace the old version.
+   * @param newUserVersion User-specified version on the new version of the document
+   * @param oldUserVersion User-specified version on the old version of the document
+   * @param userVersionFieldName Field name of the user versions being compared
+   * @return True if acceptable, false if not.
+   */
+  protected boolean newUpdateComparePasses(Comparable newUserVersion, Comparable oldUserVersion, String userVersionFieldName) {
+    return newUserVersion.compareTo(oldUserVersion) > 0;
+  }
+
+  private static Object[] getObjectValues(LeafReaderContext segmentContext,
+                                          SchemaField[] fields,
+                                          SolrIndexSearcher searcher,
+                                          int docIdInSegment) throws IOException {
+    FunctionValues[] functionValues = getManyFunctionValues(segmentContext, fields, searcher);
+    Object[] objectValues = new Object[functionValues.length];
+    for (int i = 0; i < functionValues.length; i++) {
+      objectValues[i] = functionValues[i].objectVal(docIdInSegment);
+    }
+    return objectValues;
+  }
+
+  private static FunctionValues[] getManyFunctionValues(LeafReaderContext segmentContext,
+                                                SchemaField[] fields,
+                                                SolrIndexSearcher searcher) throws IOException {
+    FunctionValues[] values = new FunctionValues[fields.length];
+    for (int i = 0; i < fields.length; i++) {
+      values[i] = getFunctionValues(segmentContext, fields[i], searcher);
+    }
+    return values;
+  }
+
+  private static FunctionValues getFunctionValues(LeafReaderContext segmentContext,
+                                          SchemaField field,
+                                          SolrIndexSearcher searcher) throws IOException {
+    ValueSource vs = field.getType().getValueSource(field, null);
+    Map context = ValueSource.newContext(searcher);
+    vs.createWeight(context, searcher);
+    return vs.getValues(context, segmentContext);
+  }
+
+  private boolean isNotLeader(UpdateCommand cmd) {
+    if ((cmd.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0) {
+      return true;
+    }
+    if (phase == DistributedUpdateProcessor.DistribPhase.FROMLEADER) {
+      return true;
+    }
+    // if phase==TOLEADER, we can't just assume we are the leader... let the normal logic check.
+    return !distribProc.isLeader(cmd);
+  }
+
+  @Override
+  public void processAdd(AddUpdateCommand cmd) throws IOException {
+    if (isNotLeader(cmd)) {
+      super.processAdd(cmd);
+      return;
+    }
+
+    final SolrInputDocument newDoc = cmd.getSolrInputDocument();
+    Object[] newVersions = getUserVersionsFromDocument(newDoc);
+    validateUserVersions(newVersions, versionFieldNames, "Doc does not have versionField: ");
+
+    for (int i=0; ;i++) {
+      logOverlyFailedRetries(i, cmd);
+
+      if (!isVersionNewEnough(cmd.getIndexedId(), newVersions)) {
+        // drop older update
+        return;
+      }
+
+      try {
+        cmd.setVersion(oldSolrVersion);  // use optimistic concurrency to ensure that the doc has not changed in the meantime
+        super.processAdd(cmd);
+        return;
+      } catch (SolrException e) {
+        if (e.code() == 409) {
+          continue;  // if a version conflict, retry
+        }
+        throw e;  // rethrow
+      }
+
+    }
+  }
+
+  private static void logOverlyFailedRetries(int i, UpdateCommand cmd) {
+    // Log a warning every 256 retries.... even a few retries should normally be very unusual.
+    if ((i&0xff) == 0xff) {
+      log.warn("Unusual number of optimistic concurrency retries: retries=" + i + " cmd=" + cmd);
+    }
+  }
+
+  @Override
+  public void processDelete(DeleteUpdateCommand cmd) throws IOException {
+    if (deleteVersionParamNames.length == 0) {
+      // not suppose to look at deletes at all
+      super.processDelete(cmd);
+      return;
+    }
+
+    if ( ! cmd.isDeleteById() ) {
+      // nothing to do
+      super.processDelete(cmd);
+      return;
+    }
+
+    String[] deleteParamValues = getDeleteParamValuesFromRequest(cmd);
+    validateDeleteParamValues(deleteParamValues);
+
+
+    if (isNotLeader(cmd)) {
+      // transform delete to add earlier rather than later
+
+      SolrInputDocument newDoc = new SolrInputDocument();
+      newDoc.setField(core.getLatestSchema().getUniqueKeyField().getName(),
+          cmd.getId());
+      setDeleteParamValues(newDoc, deleteParamValues);
+
+      AddUpdateCommand newCmd = new AddUpdateCommand(cmd.getReq());
+      newCmd.solrDoc = newDoc;
+      newCmd.commitWithin = cmd.commitWithin;
+      super.processAdd(newCmd);
+      return;
+    }
+
+
+    for (int i=0; ;i++) {
+
+      logOverlyFailedRetries(i, cmd);
+
+      if (!isVersionNewEnough(cmd.getIndexedId(), deleteParamValues)) {
+        // drop this older update
+        return;
+      }
+
+      // :TODO: should this logic be split and driven by two params?
+      //   - deleteVersionParam to do a version check
+      //   - some new boolean param to determine if a stub document gets added in place?
+      try {
+        // drop the delete, and instead propagate an AddDoc that
+        // replaces the doc with a new "empty" one that records the deleted version
+
+        SolrInputDocument newDoc = new SolrInputDocument();
+        newDoc.setField(core.getLatestSchema().getUniqueKeyField().getName(),
+            cmd.getId());
+        setDeleteParamValues(newDoc, deleteParamValues);
+
+        AddUpdateCommand newCmd = new AddUpdateCommand(cmd.getReq());
+        newCmd.solrDoc = newDoc;
+        newCmd.commitWithin = cmd.commitWithin;
+
+        newCmd.setVersion(oldSolrVersion);  // use optimistic concurrency to ensure that the doc has not changed in the meantime
+        super.processAdd(newCmd);
+        return;
+      } catch (SolrException e) {
+        if (e.code() == 409) {
+          continue;  // if a version conflict, retry
+        }
+        throw e;  // rethrow
+      }
+
+    }
+  }
+
+  private String[] getDeleteParamValuesFromRequest(DeleteUpdateCommand cmd) {
+    SolrParams params = cmd.getReq().getParams();
+    String[] returnArr = new String[deleteVersionParamNames.length];
+    for (int i = 0; i < deleteVersionParamNames.length; i++) {
+      String deleteVersionParamName = deleteVersionParamNames[i];
+      String deleteParamValue = params.get(deleteVersionParamName);
+      returnArr[i] = deleteParamValue;
+    }
+    return returnArr;
+  }
+
+  private void validateDeleteParamValues(String[] values) {
+    for (int i = 0; i < values.length; i++) {
+      String deleteParamValue = values[i];
+      if (null == deleteParamValue) {
+        String deleteVersionParamName = deleteVersionParamNames[i];
+        throw new SolrException(BAD_REQUEST,
+            "Delete by ID must specify doc version param: " +
+                deleteVersionParamName);
+      }
+    }
+  }
+
+  private void setDeleteParamValues(SolrInputDocument doc, String[] values) {
+    for (int i = 0; i < values.length; i++) {
+      String versionFieldName = versionFieldNames[i];
+      String value = values[i];
+      doc.setField(versionFieldName, value);
+    }
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3d5f2f24/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.java
index 4790f36..ff4d78a 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.java
@@ -16,71 +16,63 @@
  */
 package org.apache.solr.update.processor;
 
-import org.apache.lucene.queries.function.FunctionValues;
-import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+import java.util.List;
+
 import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.component.RealTimeGetComponent;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.update.AddUpdateCommand;
-import org.apache.solr.update.DeleteUpdateCommand;
-import org.apache.solr.update.UpdateCommand;
-import org.apache.solr.util.RefCounted;
 import org.apache.solr.util.plugin.SolrCoreAware;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Map;
-
-import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
-import static org.apache.solr.common.SolrException.ErrorCode.CONFLICT;
 import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
-import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
 
 /**
  * <p>
  * This Factory generates an UpdateProcessor that helps to enforce Version 
  * constraints on documents based on per-document version numbers using a configured 
- * name of a <code>versionField</code>.  It should be configured on the "default"
+ * <code>versionField</code>, a comma-delimited list of fields to check for version
+ * numbers.  It should be configured on the "default"
  * update processor somewhere before the DistributedUpdateProcessorFactory.
  * As an example, see the solrconfig.xml that the tests use:
  * solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml
  * </p>
  * <p>
- * When documents are added through this processor, if a document with the same 
- * unique key already exists in the collection, then the value of the 
- * <code>versionField</code> in the <i>existing</i> document is not less then the 
- * field value in the <i>new</i> document then the new document is rejected with a 
+ * When documents are added through this processor, if a document with the same
+ * unique key already exists in the collection, then the values within the fields
+ * as specified by the comma-delimited <code>versionField</code> property are checked,
+ * and if in the <i>existing</i> document the values for all fields are not less than the
+ * field values in the <i>new</i> document, then the new document is rejected with a
  * 409 Version Conflict error.
  * </p>
  * <p>
- * In addition to the mandatory <code>versionField</code> init param, two additional 
+ * In addition to the mandatory <code>versionField</code> init param, two additional
  * optional init params affect the behavior of this factory:
  * </p>
  * <ul>
- *   <li><code>deleteVersionParam</code> - This string parameter controls whether this 
- *     processor will intercept and inspect Delete By Id commands in addition to adding 
- *     documents.  If specified, then the value will specify the name of a request 
- *     paramater which becomes  mandatory for all Delete By Id commands.  This param 
- *     must then be used to specify the document version associated with the delete.
- *     If the version specified using this param is not greater then the value in the 
- *     <code>versionField</code> for any existing document, then the delete will fail 
- *     with a 409 Version Conflict error.  When using this param, Any Delete By Id 
- *     command with a high enough document version number to succeed will be internally 
- *     converted into an Add Document command that replaces the existing document with 
- *     a new one which is empty except for the Unique Key and <code>versionField</code> 
- *     to keeping a record of the deleted version so future Add Document commands will 
+ *   <li><code>deleteVersionParam</code> - This string parameter controls whether this
+ *     processor will intercept and inspect Delete By Id commands in addition to adding
+ *     documents.  If specified, then the value will specify the name(s) of the request
+ *     parameter(s) which becomes  mandatory for all Delete By Id commands. Like
+ *     <code>versionField</code>, <code>deleteVersionParam</code> is comma-delimited.
+ *     For each of the params given, it specifies the document version associated with
+ *     the delete, where the index matches <code>versionField</code>. For example, if
+ *     <code>versionField</code> was set to 'a,b' and <code>deleteVersionParam</code>
+ *     was set to 'p1,p2', p1 should give the version for field 'a' and p2 should give
+ *     the version for field 'b'. If the versions specified using these params are not
+ *     greater then the value in the <code>versionField</code> for any existing document,
+ *     then the delete will fail with a 409 Version Conflict error.  When using this
+ *     param, Any Delete By Id command with a high enough document version number to
+ *     succeed will be internally converted into an Add Document command that replaces
+ *     the existing document with a new one which is empty except for the Unique Key
+ *     and fields corresponding to the fields listed in <code>versionField</code>
+ *     to keeping a record of the deleted version so future Add Document commands will
  *     fail if their "new" version is not high enough.</li>
  *
  *   <li><code>ignoreOldUpdates</code> - This boolean parameter defaults to 
@@ -95,8 +87,8 @@ public class DocBasedVersionConstraintsProcessorFactory extends UpdateRequestPro
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private boolean ignoreOldUpdates = false;
-  private String versionField = null;
-  private String deleteVersionParamName = null;
+  private List<String> versionFields = null;
+  private List<String> deleteVersionParamNames = Collections.emptyList();
   private boolean useFieldCache;
 
   @Override
@@ -104,23 +96,28 @@ public class DocBasedVersionConstraintsProcessorFactory extends UpdateRequestPro
 
     Object tmp = args.remove("versionField");
     if (null == tmp) {
-      throw new SolrException(SERVER_ERROR, 
-                              "'versionField' must be configured");
+      throw new SolrException(SERVER_ERROR,
+          "'versionField' must be configured");
     }
     if (! (tmp instanceof String) ) {
-      throw new SolrException(SERVER_ERROR, 
-                              "'versionField' must be configured as a <str>");
+      throw new SolrException(SERVER_ERROR,
+          "'versionField' must be configured as a <str>");
     }
-    versionField = tmp.toString();
+    versionFields = StrUtils.splitSmart((String)tmp, ',');
 
     // optional
     tmp = args.remove("deleteVersionParam");
     if (null != tmp) {
       if (! (tmp instanceof String) ) {
-        throw new SolrException(SERVER_ERROR, 
-                                "'deleteVersionParam' must be configured as a <str>");
+        throw new SolrException(SERVER_ERROR,
+            "'deleteVersionParam' must be configured as a <str>");
       }
-      deleteVersionParamName = tmp.toString();
+      deleteVersionParamNames = StrUtils.splitSmart((String)tmp, ',');
+    }
+
+    if (deleteVersionParamNames.size() > 0 && deleteVersionParamNames.size() != versionFields.size()) {
+      throw new SolrException(SERVER_ERROR, "The number of 'deleteVersionParam' params " +
+          "must either be 0 or equal to the number of 'versionField' fields");
     }
 
     // optional - defaults to false
@@ -130,18 +127,18 @@ public class DocBasedVersionConstraintsProcessorFactory extends UpdateRequestPro
         throw new SolrException(SERVER_ERROR, 
                                 "'ignoreOldUpdates' must be configured as a <bool>");
       }
-      ignoreOldUpdates = ((Boolean)tmp).booleanValue();
+      ignoreOldUpdates = (Boolean) tmp;
     }
     super.init(args);
   }
-  
 
+  @Override
   public UpdateRequestProcessor getInstance(SolrQueryRequest req, 
                                             SolrQueryResponse rsp, 
                                             UpdateRequestProcessor next ) {
-    return new DocBasedVersionConstraintsProcessor(versionField, 
+    return new DocBasedVersionConstraintsProcessor(versionFields,
                                                    ignoreOldUpdates,
-                                                   deleteVersionParamName,
+                                                   deleteVersionParamNames,
                                                    useFieldCache,
                                                    req, rsp, next);
   }
@@ -159,341 +156,23 @@ public class DocBasedVersionConstraintsProcessorFactory extends UpdateRequestPro
           "schema must have uniqueKey defined.");
     }
 
-    SchemaField userVersionField = core.getLatestSchema().getField(versionField);
-    if (userVersionField == null || !userVersionField.stored() || userVersionField.multiValued()) {
-      throw new SolrException(SERVER_ERROR,
-          "field " + versionField + " must be defined in schema, be stored, and be single valued.");
-    }
-
-    try {
-      ValueSource vs = userVersionField.getType().getValueSource(userVersionField, null);
-      useFieldCache = true;
-    } catch (Exception e) {
-      log.warn("Can't use fieldcache/valuesource: " + e.getMessage());
-    }
-  }
-
-
-
-  private static class DocBasedVersionConstraintsProcessor
-    extends UpdateRequestProcessor {
-
-    private final String versionFieldName;
-    private final SchemaField userVersionField;
-    private final SchemaField solrVersionField;
-    private final boolean ignoreOldUpdates;
-    private final String deleteVersionParamName;
-    private final SolrCore core;
-
-    private long oldSolrVersion;  // current _version_ of the doc in the index/update log
-    private DistributedUpdateProcessor distribProc;  // the distributed update processor following us
-    private DistributedUpdateProcessor.DistribPhase phase;
-    private boolean useFieldCache;
-
-    public DocBasedVersionConstraintsProcessor(String versionField,
-                                               boolean ignoreOldUpdates,
-                                               String deleteVersionParamName,
-                                               boolean useFieldCache,
-                                               SolrQueryRequest req, 
-                                               SolrQueryResponse rsp, 
-                                               UpdateRequestProcessor next ) {
-      super(next);
-      this.ignoreOldUpdates = ignoreOldUpdates;
-      this.deleteVersionParamName = deleteVersionParamName;
-      this.core = req.getCore();
-      this.versionFieldName = versionField;
-      this.userVersionField = core.getLatestSchema().getField(versionField);
-      this.solrVersionField = core.getLatestSchema().getField(CommonParams.VERSION_FIELD);
-      this.useFieldCache = useFieldCache;
-
-      for (UpdateRequestProcessor proc = next ;proc != null; proc = proc.next) {
-        if (proc instanceof DistributedUpdateProcessor) {
-          distribProc = (DistributedUpdateProcessor)proc;
-          break;
-        }
-      }
-
-      if (distribProc == null) {
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "DistributedUpdateProcessor must follow DocBasedVersionConstraintsProcessor");
-      }
-
-      phase = DistributedUpdateProcessor.DistribPhase.parseParam(req.getParams().get(DISTRIB_UPDATE_PARAM));
-
-    }
-    
-    /**
-     * Inspects a raw field value (which may come from a doc in the index, or a 
-     * doc in the UpdateLog that still has String values, or a String sent by 
-     * the user as a param) and if it is a String, asks the versionField FieldType 
-     * to convert it to an Object suitable for comparison.
-     */
-    private Object convertFieldValueUsingType(SchemaField sf, final Object rawValue) {
-      if (rawValue instanceof CharSequence) {
-        // in theory, the FieldType might still be CharSequence based,
-        // but in that case trust it to do an identity conversion...
-        FieldType fieldType = userVersionField.getType();
-        BytesRefBuilder term = new BytesRefBuilder();
-        fieldType.readableToIndexed((CharSequence)rawValue, term);
-        return fieldType.toObject(userVersionField, term.get());
-      }
-      // else...
-      return rawValue;
-    }
-
-
-    /**
-     * Returns true if the specified new version value is greater the the one
-     * already known to exist for the document, or the document does not already
-     * exist.
-     * Returns false if the specified new version is not high enough but the
-     * processor has been configured with ignoreOldUpdates=true
-     * Throws a SolrException if the version is not high enough and
-     * ignoreOldUpdates=false
-     */
-    private boolean isVersionNewEnough(BytesRef indexedDocId,
-                                       Object newUserVersion) throws IOException {
-      assert null != indexedDocId;
-      assert null != newUserVersion;
-
-      oldSolrVersion = -1;
-      // log.info("!!!!!!!!! isVersionNewEnough being called for " + indexedDocId.utf8ToString() + " newVersion=" + newUserVersion);
-      newUserVersion = convertFieldValueUsingType(userVersionField, newUserVersion);
-      Object oldUserVersion = null;
-      SolrInputDocument oldDoc = null;
-
-      if (useFieldCache) {
-        oldDoc = RealTimeGetComponent.getInputDocumentFromTlog(core, indexedDocId, null, null, true);
-        if (oldDoc == RealTimeGetComponent.DELETED) {
-          return true;
-        }
-        if (oldDoc == null) {
-          // need to look up in index now...
-          RefCounted<SolrIndexSearcher> newestSearcher = core.getRealtimeSearcher();
-          try {
-            SolrIndexSearcher searcher = newestSearcher.get();
-            long lookup = searcher.lookupId(indexedDocId);
-            if (lookup < 0) {
-              // doc not in index either...
-              return true;
-            }
-
-            ValueSource vs = solrVersionField.getType().getValueSource(solrVersionField, null);
-            Map context = ValueSource.newContext(searcher);
-            vs.createWeight(context, searcher);
-            FunctionValues fv = vs.getValues(context, searcher.getTopReaderContext().leaves().get((int)(lookup>>32)));
-            oldSolrVersion = fv.longVal((int)lookup);
-
-            vs = userVersionField.getType().getValueSource(userVersionField, null);
-            context = ValueSource.newContext(searcher);
-            vs.createWeight(context, searcher);
-            fv = vs.getValues(context, searcher.getTopReaderContext().leaves().get((int)(lookup>>32)));
-            oldUserVersion = fv.objectVal((int)lookup);
-
-          } catch (IOException e) {
-            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading version from index", e);
-          } finally {
-            if (newestSearcher != null) {
-              newestSearcher.decref();
-            }
-          }
-        }
-      } else {
-        // stored fields only...
-
-        oldDoc = RealTimeGetComponent.getInputDocument(core, indexedDocId);
-
-        if (null == oldDoc) {
-          // log.info("VERSION no doc found, returning true");
-          return true;
-        }
-      }
-
-
-      if (oldDoc != null) {
-        oldUserVersion = oldDoc.getFieldValue(versionFieldName);
-        // Make the FieldType resolve any conversion we need.
-        oldUserVersion = convertFieldValueUsingType(userVersionField, oldUserVersion);
-
-        Object o = oldDoc.getFieldValue(solrVersionField.getName());
-        if (o == null) {
-          throw new SolrException(SERVER_ERROR, "No _version_ for document "+ oldDoc);
-        }
-        oldSolrVersion = o instanceof Number ? ((Number) o).longValue() : Long.parseLong(o.toString());
-      }
-
-      // log.info("VERSION old=" + oldUserVersion + " new=" +newUserVersion );
-
-      if ( null == oldUserVersion) {
-        // could happen if they turn this feature on after building an index
-        // w/o the versionField
+    useFieldCache = true;
+    for (String versionField : versionFields) {
+      SchemaField userVersionField = core.getLatestSchema().getField(versionField);
+      if (userVersionField == null || !userVersionField.stored() || userVersionField.multiValued()) {
         throw new SolrException(SERVER_ERROR,
-            "Doc exists in index, but has null versionField: "
-                + versionFieldName);
+            "field " + versionField + " must be defined in schema, be stored, and be single valued.");
       }
-
-
-      if (! (oldUserVersion instanceof Comparable && newUserVersion instanceof Comparable) ) {
-        throw new SolrException(BAD_REQUEST,
-            "old version and new version are not comparable: " +
-                oldUserVersion.getClass()+" vs "+newUserVersion.getClass());
-      }
-
-      try {
-        if (0 < ((Comparable)newUserVersion).compareTo((Comparable) oldUserVersion)) {
-          // log.info("VERSION returning true (proceed with update)" );
-          return true;
-        }
-        if (ignoreOldUpdates) {
-          if (log.isDebugEnabled()) {
-            log.debug("Dropping update since user version is not high enough: " + newUserVersion + "; old user version=" + oldUserVersion);
-          }
-          // log.info("VERSION returning false (dropping update)" );
-          return false;
-        } else {
-          // log.info("VERSION will throw conflict" );
-          throw new SolrException(CONFLICT,
-              "user version is not high enough: " + newUserVersion);
-        }
-      } catch (ClassCastException e) {
-        throw new SolrException(BAD_REQUEST,
-            "old version and new version are not comparable: " +
-                oldUserVersion.getClass()+" vs "+newUserVersion.getClass() +
-                ": " + e.getMessage(), e);
-
-      }
-    }
-
-
-
-    public boolean isLeader(UpdateCommand cmd) {
-      if ((cmd.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0) {
-        return false;
-      }
-      if (phase == DistributedUpdateProcessor.DistribPhase.FROMLEADER) {
-        return false;
-      }
-      // if phase==TOLEADER, we can't just assume we are the leader... let the normal logic check.
-      boolean x = distribProc.isLeader(cmd);
-      // log.info("VERSION: checking if we are leader:" + x);
-      return x;
-    }
-
-    public void processAdd(AddUpdateCommand cmd) throws IOException {
-      if (!isLeader(cmd)) {
-        super.processAdd(cmd);
-        return;
-      }
-
-      final SolrInputDocument newDoc = cmd.getSolrInputDocument();
-
-      Object newVersion = newDoc.getFieldValue(versionFieldName);
-      if ( null == newVersion ) {
-        throw new SolrException(BAD_REQUEST, "Doc does not have versionField: " + versionFieldName);
-      }
-
-      for (int i=0; ;i++) {
-        // Log a warning every 256 retries.... even a few retries should normally be very unusual.
-        if ((i&0xff) == 0xff) {
-          log.warn("Unusual number of optimistic concurrency retries: retries=" + i + " cmd=" + cmd);
-        }
-
-        if (!isVersionNewEnough(cmd.getIndexedId(), newVersion)) {
-          // drop older update
-          return;
-        }
-
+      if (useFieldCache) {
         try {
-          cmd.setVersion(oldSolrVersion);  // use optimistic concurrency to ensure that the doc has not changed in the meantime
-          super.processAdd(cmd);
-          return;
-        } catch (SolrException e) {
-          if (e.code() == 409) {
-            // log.info ("##################### CONFLICT ADDING newDoc=" + newDoc + " newVersion=" + newVersion );
-            continue;  // if a version conflict, retry
-          }
-          throw e;  // rethrow
+          userVersionField.getType().getValueSource(userVersionField, null);
+        } catch (Exception e) {
+          useFieldCache = false;
+          log.warn("Can't use fieldcache/valuesource: " + e.getMessage());
         }
-
       }
     }
+  }
 
-    public void processDelete(DeleteUpdateCommand cmd) throws IOException {
-      if (null == deleteVersionParamName) {
-        // not suppose to look at deletes at all
-        super.processDelete(cmd);
-        return;
-      }
-
-      if ( ! cmd.isDeleteById() ) {
-        // nothing to do
-        super.processDelete(cmd);
-        return;
-      }
-
-      String deleteParamValue = cmd.getReq().getParams().get(deleteVersionParamName);
-      if (null == deleteParamValue) {
-        throw new SolrException(BAD_REQUEST,
-            "Delete by ID must specify doc version param: " +
-                deleteVersionParamName);
-      }
-
-
-      if (!isLeader(cmd)) {
-        // transform delete to add earlier rather than later
-
-        SolrInputDocument newDoc = new SolrInputDocument();
-        newDoc.setField(core.getLatestSchema().getUniqueKeyField().getName(),
-            cmd.getId());
-        newDoc.setField(versionFieldName, deleteParamValue);
-
-        AddUpdateCommand newCmd = new AddUpdateCommand(cmd.getReq());
-        newCmd.solrDoc = newDoc;
-        newCmd.commitWithin = cmd.commitWithin;
-        super.processAdd(newCmd);
-        return;
-      }
-
-
-      for (int i=0; ;i++) {
-        // Log a warning every 256 retries.... even a few retries should normally be very unusual.
-        if ((i&0xff) == 0xff) {
-          log.warn("Unusual number of optimistic concurrency retries: retries=" + i + " cmd=" + cmd);
-        }
-
-        if (!isVersionNewEnough(cmd.getIndexedId(), deleteParamValue)) {
-          // drop this older update
-          return;
-        }
-
-        // :TODO: should this logic be split and driven by two params?
-        //   - deleteVersionParam to do a version check
-        //   - some new boolean param to determine if a stub document gets added in place?
-        try {
-          // drop the delete, and instead propagate an AddDoc that
-          // replaces the doc with a new "empty" one that records the deleted version
-
-          SolrInputDocument newDoc = new SolrInputDocument();
-          newDoc.setField(core.getLatestSchema().getUniqueKeyField().getName(),
-              cmd.getId());
-          newDoc.setField(versionFieldName, deleteParamValue);
-
-          AddUpdateCommand newCmd = new AddUpdateCommand(cmd.getReq());
-          newCmd.solrDoc = newDoc;
-          newCmd.commitWithin = cmd.commitWithin;
-
-          newCmd.setVersion(oldSolrVersion);  // use optimistic concurrency to ensure that the doc has not changed in the meantime
-          super.processAdd(newCmd);
-          return;
-        } catch (SolrException e) {
-          if (e.code() == 409) {
-            continue;  // if a version conflict, retry
-          }
-          throw e;  // rethrow
-        }
-
-      }
-    }
 
-  } // end inner class
-  
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3d5f2f24/solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml
index 3a0adbe..faa0ee0 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml
@@ -44,11 +44,11 @@
 
   <updateRequestProcessorChain name="external-version-constraint" default="true">
     <!-- this chain uses the processor using the "deleteVersionParam" option
-         so that deleteById requests are translated into updates to preserve the 
-         (logically) deleted document in the index with a record of its deleted 
+         so that deleteById requests are translated into updates to preserve the
+         (logically) deleted document in the index with a record of its deleted
          version.
-         
-         It also demonstrates how to mix in TimestampUpdateProcessorFactory and 
+
+         It also demonstrates how to mix in TimestampUpdateProcessorFactory and
          DefaultValueUpdateProcessorFactory to ensure these logically deleted
          documents are kept out of searches, but can be cleaned up periodically
          after some amount of time has elapsed.
@@ -60,7 +60,7 @@
       <bool name="value">true</bool>
     </processor>
 
-    <!-- process the external version constraint, ignoring any updates that 
+    <!-- process the external version constraint, ignoring any updates that
          don't satisfy the constraint -->
     <processor class="solr.DocBasedVersionConstraintsProcessorFactory">
       <bool name="ignoreOldUpdates">true</bool>
@@ -69,7 +69,7 @@
     </processor>
 
     <processor class="solr.DefaultValueUpdateProcessorFactory">
-      <!-- any doc that makes it this here w/o a live value is a logically 
+      <!-- any doc that makes it this here w/o a live value is a logically
            deleted doc generated by the previous processor in place of deleteById
       -->
       <str name="fieldName">live_b</str>
@@ -84,12 +84,12 @@
 
   <updateRequestProcessorChain name="external-version-float">
     <!-- this chain uses the processor with float based versionField just
-         to sanity check that there are no hardcoded assumptions about the 
+         to sanity check that there are no hardcoded assumptions about the
          field type used that could byte us in the ass.
     -->
     <processor class="solr.DocBasedVersionConstraintsProcessorFactory">
-      <!-- process the external version constraint, ignoring any updates that 
-           don't satisfy the constraint 
+      <!-- process the external version constraint, ignoring any updates that
+           don't satisfy the constraint
       -->
       <bool name="ignoreOldUpdates">true</bool>
       <str name="versionField">my_version_f</str>
@@ -109,6 +109,20 @@
     <processor class="solr.RunUpdateProcessorFactory" />
   </updateRequestProcessorChain>
 
+  <updateRequestProcessorChain name="external-version-failhard-multiple">
+    <!-- Uses the default behavior of failing with a 409 version conflict
+         when the external version is too low.
+
+         If my_version_l is equal to the previous, then checks my_version_f.
+    -->
+    <processor class="solr.DocBasedVersionConstraintsProcessorFactory">
+      <str name="versionField">my_version_l,my_version_f</str>
+      <str name="deleteVersionParam">del_version,del_version_2</str>
+    </processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
+
   <requestHandler name="/select" class="solr.SearchHandler">
   </requestHandler>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3d5f2f24/solr/core/src/test/org/apache/solr/update/TestDocBasedVersionConstraints.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/TestDocBasedVersionConstraints.java b/solr/core/src/test/org/apache/solr/update/TestDocBasedVersionConstraints.java
index 10a07d9..20d64cf 100644
--- a/solr/core/src/test/org/apache/solr/update/TestDocBasedVersionConstraints.java
+++ b/solr/core/src/test/org/apache/solr/update/TestDocBasedVersionConstraints.java
@@ -16,6 +16,12 @@
  */
 package org.apache.solr.update;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrException;
@@ -24,12 +30,6 @@ import org.apache.solr.util.DefaultSolrThreadFactory;
 import org.junit.Before;
 import org.junit.BeforeClass;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
 public class TestDocBasedVersionConstraints extends SolrTestCaseJ4 {
 
   @BeforeClass
@@ -288,6 +288,88 @@ public class TestDocBasedVersionConstraints extends SolrTestCaseJ4 {
              , "=={'doc':{'my_version_l':1010}}");
   }
 
+  // Test multiple versions, that it has to be greater than my_version_l and my_version_f
+  public void testMultipleVersions() throws Exception {
+    updateJ(jsonAdd(sdoc("id", "aaa", "name", "a1", "my_version_l", "1001", "my_version_f", "1.0")),
+      params("update.chain","external-version-failhard-multiple"));
+    assertU(commit());
+    // All variations of additional versions should fail other than my_version_l greater or my_version_f greater.
+    try {
+      updateJ(jsonAdd(sdoc("id", "aaa", "name", "X1", "my_version_l", "1000", "my_version_f", "1.0")),
+          params("update.chain","external-version-failhard-multiple"));
+      fail("no 409");
+    } catch (SolrException ex) {
+      assertEquals(409, ex.code());
+    }
+    try {
+      updateJ(jsonAdd(sdoc("id", "aaa", "name", "X2", "my_version_l", "1001", "my_version_f", "0.9")),
+          params("update.chain","external-version-failhard-multiple"));
+      fail("no 409");
+    } catch (SolrException ex) {
+      assertEquals(409, ex.code());
+    }
+    // Also fails on the exact same version
+    try {
+      updateJ(jsonAdd(sdoc("id", "aaa", "name", "X3", "my_version_l", "1001", "my_version_f", "1.0")),
+          params("update.chain","external-version-failhard-multiple"));
+      fail("no 409");
+    } catch (SolrException ex) {
+      assertEquals(409, ex.code());
+    }
+    //Verify we are still unchanged
+    assertU(commit());
+    assertJQ(req("q","+id:aaa +name:a1"), "/response/numFound==1");
+
+    // update version 1
+    updateJ(jsonAdd(sdoc("id", "aaa", "name", "Y1", "my_version_l", "2001", "my_version_f", "1.0")),
+        params("update.chain","external-version-failhard-multiple"));
+    assertU(commit());
+    assertJQ(req("q","+id:aaa +name:Y1"), "/response/numFound==1");
+
+    // update version 2
+    updateJ(jsonAdd(sdoc("id", "aaa", "name", "Y2", "my_version_l", "2001", "my_version_f", "2.0")),
+        params("update.chain","external-version-failhard-multiple"));
+    assertU(commit());
+    assertJQ(req("q","+id:aaa +name:Y2"), "/response/numFound==1");
+  }
+
+  public void testMultipleVersionDeletes() throws Exception {
+    updateJ(jsonAdd(sdoc("id", "aaa", "name", "a1", "my_version_l", "1001", "my_version_f", "1.0")),
+        params("update.chain","external-version-failhard-multiple"));
+    assertU(commit());
+    try {
+      deleteAndGetVersion("aaa", params("del_version", "1000", "del_version_2", "1.0",
+          "update.chain","external-version-failhard-multiple"));
+      fail("no 409");
+    } catch (SolrException ex) {
+      assertEquals(409, ex.code());
+    }
+    try {
+      deleteAndGetVersion("aaa", params("del_version", "1001", "del_version_2", "0.9",
+          "update.chain","external-version-failhard-multiple"));
+      fail("no 409");
+    } catch (SolrException ex) {
+      assertEquals(409, ex.code());
+    }
+    // And just verify if we pass version 1, we still error if version 2 isn't found.
+    try {
+      deleteAndGetVersion("aaa", params("del_version", "1001",
+          "update.chain","external-version-failhard-multiple"));
+      fail("no 400");
+    } catch (SolrException ex) {
+      assertEquals(400, ex.code());
+    }
+    //Verify we are still unchanged
+    assertU(commit());
+    assertJQ(req("q","+id:aaa +name:a1"), "/response/numFound==1");
+
+    //And let's verify the actual case.
+    deleteAndGetVersion("aaa", params("del_version", "1001", "del_version_2", "2.0",
+        "update.chain","external-version-failhard-multiple"));
+    assertU(commit());
+    assertJQ(req("q","+id:aaa +name:a1"), "/response/numFound==0"); //Delete allowed
+  }
+
 
   /** 
    * Proof of concept test demonstrating how to manage and periodically cleanup

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3d5f2f24/solr/solr-ref-guide/src/updating-parts-of-documents.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc
index 949b60b..2a3d0b5 100644
--- a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc
+++ b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc
@@ -266,7 +266,9 @@ For more information, please also see Yonik Seeley's presentation on https://www
 
 == Document Centric Versioning Constraints
 
-Optimistic Concurrency is extremely powerful, and works very efficiently because it uses an internally assigned, globally unique values for the `\_version_` field. However, in some situations users may want to configure their own document specific version field, where the version values are assigned on a per-document basis by an external system, and have Solr reject updates that attempt to replace a document with an "older" version. In situations like this the {solr-javadocs}/solr-core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[`DocBasedVersionConstraintsProcessorFactory`] can be useful.
+Optimistic Concurrency is extremely powerful, and works very efficiently because it uses an internally assigned, globally unique values for the `\_version_` field.
+However, in some situations users may want to configure their own document specific version field, where the version values are assigned on a per-document basis by an external system, and have Solr reject updates that attempt to replace a document with an "older" version.
+In situations like this the {solr-javadocs}/solr-core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[`DocBasedVersionConstraintsProcessorFactory`] can be useful.
 
 The basic usage of `DocBasedVersionConstraintsProcessorFactory` is to configure it in `solrconfig.xml` as part of the <<update-request-processors.adoc#update-request-processor-configuration,UpdateRequestProcessorChain>> and specify the name of your custom `versionField` in your schema that should be checked when validating updates:
 
@@ -277,6 +279,7 @@ The basic usage of `DocBasedVersionConstraintsProcessorFactory` is to configure
 </processor>
 ----
 
+Note that `versionField` is a comma delimited list of fields to check for version numbers.
 Once configured, this update processor will reject (HTTP error code 409) any attempt to update an existing document where the value of the `my_version_l` field in the "new" document is not greater then the value of that field in the existing document.
 
 .versionField vs `\_version_`
@@ -288,6 +291,9 @@ The `\_version_` field used by Solr for its normal optimistic concurrency also h
 `DocBasedVersionConstraintsProcessorFactory` supports two additional configuration params which are optional:
 
 * `ignoreOldUpdates` - A boolean option which defaults to `false`. If set to `true` then instead of rejecting updates where the `versionField` is too low, the update will be silently ignored (and return a status 200 to the client).
-* `deleteVersionParam` - A String parameter that can be specified to indicate that this processor should also inspect Delete By Id commands. The value of this configuration option should be the name of a request parameter that the processor will now consider mandatory for all attempts to Delete By Id, and must be be used by clients to specify a value for the `versionField` which is greater then the existing value of the document to be deleted. When using this request param, any Delete By Id command with a high enough document version number to succeed will be internally converted into an Add Document command that replaces the existing document with a new one which is empty except for the Unique Key and `versionField` to keeping a record of the deleted version so future Add Document commands will fail if their "new" version is not high enough.
+* `deleteVersionParam` - A String parameter that can be specified to indicate that this processor should also inspect Delete By Id commands.
+The value of this configuration option should be the name of a request parameter that the processor will now consider mandatory for all attempts to Delete By Id, and must be be used by clients to specify a value for the `versionField` which is greater then the existing value of the document to be deleted.
+When using this request param, any Delete By Id command with a high enough document version number to succeed will be internally converted into an Add Document command that replaces the existing document with a new one which is empty except for the Unique Key and `versionField` to keeping a record of the deleted version so future Add Document commands will fail if their "new" version is not high enough.
+If `versionField` is specified as a list, then this parameter too must be specified as a comma delimited list of the same size so that the parameters correspond with the fields.
 
 Please consult the {solr-javadocs}/solr-core/org/apache/solr/update/processor/DocBasedVersionConstraintsProcessorFactory.html[DocBasedVersionConstraintsProcessorFactory javadocs] and https://git1-us-west.apache.org/repos/asf?p=lucene-solr.git;a=blob;f=solr/core/src/test-files/solr/collection1/conf/solrconfig-externalversionconstraint.xml;hb=HEAD[test solrconfig.xml file] for additional information and example usages.

[41/46] lucene-solr:jira/solr-11833: SOLR-11731: revert to 7 decimal places so we can maintain the round-trip property. Add test that we round-trip random numbers at 7 decimal places. Would fail at 8.

Posted by ab...@apache.org.

SOLR-11731: revert to 7 decimal places so we can maintain the round-trip property.
Add test that we round-trip random numbers at 7 decimal places.  Would fail at 8.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dad2d107
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dad2d107
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dad2d107

Branch: refs/heads/jira/solr-11833
Commit: dad2d1076db632535c33fa118eb851ad7d0e2537
Parents: 93f9a65
Author: David Smiley <ds...@apache.org>
Authored: Sat Apr 14 10:51:51 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Sat Apr 14 10:51:51 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                     |  2 +-
 .../apache/solr/schema/LatLonPointSpatialField.java  |  4 ++--
 .../org/apache/solr/search/TestSolr4Spatial2.java    | 15 ++++++++++++---
 3 files changed, 15 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dad2d107/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b39fd6f..08c246f 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -168,7 +168,7 @@ Optimizations
   a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
 
 * SOLR-11731: LatLonPointSpatialField can now decode points from docValues when stored=false docValues=true,
-  albeit with maximum precision of 1.04cm (Karthik Ramachandran, David Smiley)
+  albeit with maximum precision of 1.37cm (Karthik Ramachandran, David Smiley)
 
 * SOLR-11891: DocStreamer now respects the ReturnFields when populating a SolrDocument, reducing the
   number of unneccessary fields a ResponseWriter will see if documentCache is used (wei wang, hossman)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dad2d107/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java b/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
index dd3f586..dfdd074 100644
--- a/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
+++ b/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
@@ -87,10 +87,10 @@ public class LatLonPointSpatialField extends AbstractSpatialFieldType implements
   public static String decodeDocValueToString(long value) {
     final double latDouble = GeoEncodingUtils.decodeLatitude((int) (value >> 32));
     final double lonDouble = GeoEncodingUtils.decodeLongitude((int) (value & 0xFFFFFFFFL));
-    // This # decimal places maximizes our available precision to just over a centimeter; we have a test for it.
+    // This # decimal places gets us close to our available precision to 1.37cm; we have a test for it.
     // CEILING round-trips (decode then re-encode then decode to get identical results). Others did not. It also
     //   reverses the "floor" that occurred when we encoded.
-    final int DECIMAL_PLACES = 8;
+    final int DECIMAL_PLACES = 7;
     final RoundingMode ROUND_MODE = CEILING;
     BigDecimal latitudeDecoded = BigDecimal.valueOf(latDouble).setScale(DECIMAL_PLACES, ROUND_MODE);
     BigDecimal longitudeDecoded = BigDecimal.valueOf(lonDouble).setScale(DECIMAL_PLACES, ROUND_MODE);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dad2d107/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
index df1c8cd..e5dc691 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
@@ -16,6 +16,7 @@
  */
 package org.apache.solr.search;
 
+import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -56,6 +57,7 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
   public void setUp() throws Exception {
     super.setUp();
     clearIndex();
+    RetrievalCombo.idCounter = 0;
   }
 
   @Test
@@ -164,18 +166,25 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
         = SpatialContext.GEO.calcDistance(absErrorPt, 0,0) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
     assertEquals(1.0420371840922256, deltaCentimetersMax, 0.0);// just so that we see it in black & white in the test
 
-    assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters <= deltaCentimetersMax);
+    //max found by trial & error.  If we used 8 decimal places then we could get down to 1.04cm accuracy but then we
+    // lose the ability to round-trip -- 40 would become 39.99999997  (ugh).
+    assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters <= 1.37);
   }
 
   @Test
   public void testLatLonRetrieval() throws Exception {
-    final String ptHighPrecision = "40.2996543270,-74.0824956673";
+    final String ptHighPrecision =   "40.2996543270,-74.0824956673";
     final String ptLossOfPrecision = "40.2996544,-74.0824957"; // rounded version of the one above, losing precision
 
     // "_1" is single, "_N" is multiValued
     // "_dv" is docValues (otherwise not),  "_dvasst" is useDocValuesAsStored (otherwise not)
     // "_st" is stored" (otherwise not)
 
+    // a random point using the number of decimal places we support for round-tripping.
+    String randPointStr =
+        new BigDecimal(GeoTestUtil.nextLatitude()).setScale(7, BigDecimal.ROUND_HALF_UP).stripTrailingZeros().toPlainString() +
+        "," + new BigDecimal(GeoTestUtil.nextLongitude()).setScale(7, BigDecimal.ROUND_HALF_UP).stripTrailingZeros().toPlainString();
+
     List<RetrievalCombo> combos = Arrays.asList(
         new RetrievalCombo("llp_1_dv_st", ptHighPrecision),
         new RetrievalCombo("llp_N_dv_st", Arrays.asList("-40,40", "-45,45")),
@@ -184,7 +193,7 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
         new RetrievalCombo("llp_1_dv_dvasst", ptHighPrecision, ptLossOfPrecision),
         // this one comes back in a different order since it gets sorted low to high
         new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40", "-45,45"), Arrays.asList("-45,45", "-40,40")),
-        new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList("-40,40")), // multiValued but 1 value
+        new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList(randPointStr)), // multiValued but 1 value
         // edge cases.  (note we sorted it as Lucene will internally)
         new RetrievalCombo("llp_N_dv_dvasst", Arrays.asList(
             "-90,180", "-90,-180",

[37/46] lucene-solr:jira/solr-11833: LUCENE-8248: MergePolicyWrapper is renamed to FilterMergePolicy and now also overrides getMaxCFSSegmentSizeMB

Posted by ab...@apache.org.

LUCENE-8248: MergePolicyWrapper is renamed to FilterMergePolicy and now also overrides getMaxCFSSegmentSizeMB


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7c0387ad
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7c0387ad
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7c0387ad

Branch: refs/heads/jira/solr-11833
Commit: 7c0387ad3fa7985564350a0cd16694905e66619d
Parents: 487daab
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Apr 13 15:45:19 2018 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Apr 13 15:45:19 2018 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../apache/lucene/index/FilterMergePolicy.java  | 106 +++++++++++++++++++
 .../org/apache/lucene/index/MergePolicy.java    |   2 +-
 .../apache/lucene/index/MergePolicyWrapper.java |  67 ++----------
 .../org/apache/lucene/index/NoMergePolicy.java  |  11 +-
 .../index/OneMergeWrappingMergePolicy.java      |   2 +-
 .../lucene/index/UpgradeIndexMergePolicy.java   |   2 +-
 .../index/TestDemoParallelLeafReader.java       |   2 +-
 .../lucene/index/TestFilterMergePolicy.java     |  37 +++++++
 .../apache/lucene/index/TestIndexWriter.java    |   2 +-
 .../lucene/index/TestIndexWriterOnDiskFull.java |   2 +-
 .../lucene/index/TestMergePolicyWrapper.java    |  37 -------
 .../apache/lucene/index/TestMultiFields.java    |   2 +-
 .../apache/lucene/index/ForceMergePolicy.java   |   2 +-
 .../apache/solr/index/SortingMergePolicy.java   |   4 +-
 .../org/apache/solr/util/RandomMergePolicy.java |   4 +-
 16 files changed, 172 insertions(+), 113 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 58d234f..e559099 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -104,6 +104,9 @@ API Changes
   Instead use IndexSearcher.createWeight(), rewriting the query first.
   (Alan Woodward)
 
+* LUCENE-8248: MergePolicyWrapper is renamed to FilterMergePolicy and now
+  also overrides getMaxCFSSegmentSizeMB (Mike Sokolov via Mike McCandless)
+
 New Features
 
 * LUCENE-8200: Allow doc-values to be updated atomically together

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java
new file mode 100644
index 0000000..d073b84
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterMergePolicy.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.util.IOSupplier;
+
+/**
+ * A wrapper for {@link MergePolicy} instances.
+ *
+ * @lucene.experimental
+ */
+public class FilterMergePolicy extends MergePolicy {
+
+  /** The wrapped {@link MergePolicy}. */
+  protected final MergePolicy in;
+
+  /**
+   * Creates a new filter merge policy instance wrapping another.
+   *
+   * @param in the wrapped {@link MergePolicy}
+   */
+  public FilterMergePolicy(MergePolicy in) {
+    this.in = in;
+  }
+
+  @Override
+  public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer)
+      throws IOException {
+    return in.findMerges(mergeTrigger, segmentInfos, writer);
+  }
+
+  @Override
+  public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount,
+      Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException {
+    return in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer);
+  }
+
+  @Override
+  public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
+    return in.findForcedDeletesMerges(segmentInfos, writer);
+  }
+
+  @Override
+  public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, IndexWriter writer)
+      throws IOException {
+    return in.useCompoundFile(infos, mergedInfo, writer);
+  }
+
+  @Override
+  protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException {
+    return in.size(info, writer);
+  }
+
+  @Override
+  public double getNoCFSRatio() {
+    return in.getNoCFSRatio();
+  }
+
+  @Override
+  public final void setNoCFSRatio(double noCFSRatio) {
+    in.setNoCFSRatio(noCFSRatio);
+  }
+
+  @Override
+  public final void setMaxCFSSegmentSizeMB(double v) {
+    in.setMaxCFSSegmentSizeMB(v);
+  }
+
+  @Override
+  public final double getMaxCFSSegmentSizeMB() {
+    return in.getMaxCFSSegmentSizeMB();
+  }
+
+  @Override
+  public String toString() {
+    return getClass().getSimpleName() + "(" + in + ")";
+  }
+
+  @Override
+  public boolean keepFullyDeletedSegment(CodecReader reader) throws IOException {
+    return in.keepFullyDeletedSegment(reader);
+  }
+
+  @Override
+  public int numDeletesToMerge(SegmentCommitInfo info, int pendingDeleteCount,
+                               IOSupplier<CodecReader> readerSupplier) throws IOException {
+    return in.numDeletesToMerge(info, pendingDeleteCount, readerSupplier);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
index 8212c4a..093fe5a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
@@ -590,7 +590,7 @@ public abstract class MergePolicy {
   }
 
   /** Returns the largest size allowed for a compound file segment */
-  public final double getMaxCFSSegmentSizeMB() {
+  public double getMaxCFSSegmentSizeMB() {
     return maxCFSSegmentSize/1024/1024.;
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java
index c7124ad..4731e5e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicyWrapper.java
@@ -17,80 +17,25 @@
 package org.apache.lucene.index;
 
 import java.io.IOException;
-import java.util.Map;
 
 import org.apache.lucene.util.IOSupplier;
 
 /**
  * A wrapper for {@link MergePolicy} instances.
- *
+ * @deprecated replace with FilterMergePolicy
  * @lucene.experimental
  */
-public class MergePolicyWrapper extends MergePolicy {
-
-  /** The wrapped {@link MergePolicy}. */
-  protected final MergePolicy in;
+@Deprecated
+public class MergePolicyWrapper extends FilterMergePolicy {
+  // placeholder for backwards compatibility
 
   /**
-   * Creates a new merge policy instance.
+   * Creates a new merge policy instance wrapping another.
    *
    * @param in the wrapped {@link MergePolicy}
    */
   public MergePolicyWrapper(MergePolicy in) {
-    this.in = in;
-  }
-
-  @Override
-  public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer)
-      throws IOException {
-    return in.findMerges(mergeTrigger, segmentInfos, writer);
-  }
-
-  @Override
-  public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount,
-      Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException {
-    return in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer);
-  }
-
-  @Override
-  public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) throws IOException {
-    return in.findForcedDeletesMerges(segmentInfos, writer);
-  }
-
-  @Override
-  public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, IndexWriter writer)
-      throws IOException {
-    return in.useCompoundFile(infos, mergedInfo, writer);
-  }
-
-  @Override
-  protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException {
-    return in.size(info, writer);
-  }
-
-  @Override
-  public double getNoCFSRatio() {
-    return in.getNoCFSRatio();
-  }
-
-  @Override
-  public final void setNoCFSRatio(double noCFSRatio) {
-    in.setNoCFSRatio(noCFSRatio);
-  }
-
-  @Override
-  public final void setMaxCFSSegmentSizeMB(double v) {
-    in.setMaxCFSSegmentSizeMB(v);
-  }
-
-  @Override
-  public String toString() {
-    return getClass().getSimpleName() + "(" + in + ")";
-  }
-
-  @Override
-  public boolean keepFullyDeletedSegment(CodecReader reader) throws IOException {
-    return in.keepFullyDeletedSegment(reader);
+    super(in);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java
index 08f900a..e1f1a54 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java
@@ -54,17 +54,22 @@ public final class NoMergePolicy extends MergePolicy {
   protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException {
     return Long.MAX_VALUE;
   }
-  
+
   @Override
   public double getNoCFSRatio() {
     return super.getNoCFSRatio();
   }
-  
+
+  @Override
+  public double getMaxCFSSegmentSizeMB() {
+    return super.getMaxCFSSegmentSizeMB();
+  }
+
   @Override
   public void setMaxCFSSegmentSizeMB(double v) {
     super.setMaxCFSSegmentSizeMB(v);
   }
-  
+
   @Override
   public void setNoCFSRatio(double noCFSRatio) {
     super.setNoCFSRatio(noCFSRatio);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/java/org/apache/lucene/index/OneMergeWrappingMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/OneMergeWrappingMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/OneMergeWrappingMergePolicy.java
index 982e8c2..300c3fb 100644
--- a/lucene/core/src/java/org/apache/lucene/index/OneMergeWrappingMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/OneMergeWrappingMergePolicy.java
@@ -26,7 +26,7 @@ import java.util.function.UnaryOperator;
  *
  * @lucene.experimental
  */
-public class OneMergeWrappingMergePolicy extends MergePolicyWrapper {
+public class OneMergeWrappingMergePolicy extends FilterMergePolicy {
 
   private final UnaryOperator<OneMerge> wrapOneMerge;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
index 74cbc90..a8a0aed 100644
--- a/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
@@ -48,7 +48,7 @@ import java.util.HashMap;
   * @lucene.experimental
   * @see IndexUpgrader
   */
-public class UpgradeIndexMergePolicy extends MergePolicyWrapper {
+public class UpgradeIndexMergePolicy extends FilterMergePolicy {
 
   /** Wrap the given {@link MergePolicy} and intercept forceMerge requests to
    * only upgrade segments written with previous Lucene versions. */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
index ae06ae7..415dd90 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
@@ -508,7 +508,7 @@ public class TestDemoParallelLeafReader extends LuceneTestCase {
     }
 
     /** Just replaces the sub-readers with parallel readers, so reindexed fields are merged into new segments. */
-    private class ReindexingMergePolicy extends MergePolicyWrapper {
+    private class ReindexingMergePolicy extends FilterMergePolicy {
 
       class ReindexingOneMerge extends OneMerge {
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java
new file mode 100644
index 0000000..4057d3c
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFilterMergePolicy.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestFilterMergePolicy extends LuceneTestCase {
+
+  public void testMethodsOverridden() throws Exception {
+    for (Method m : MergePolicy.class.getDeclaredMethods()) {
+      if (Modifier.isFinal(m.getModifiers())) continue;
+      try {
+        FilterMergePolicy.class.getDeclaredMethod(m.getName(),  m.getParameterTypes());
+      } catch (NoSuchMethodException e) {
+        fail("MergePolicyWrapper needs to override '" + m + "'");
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index e45716d..12151e7 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -2222,7 +2222,7 @@ public class TestIndexWriter extends LuceneTestCase {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
     AtomicBoolean keepFullyDeletedSegments = new AtomicBoolean();
-    iwc.setMergePolicy(new MergePolicyWrapper(iwc.getMergePolicy()) {
+    iwc.setMergePolicy(new FilterMergePolicy(iwc.getMergePolicy()) {
       @Override
       public boolean keepFullyDeletedSegment(CodecReader reader) throws IOException {
         return keepFullyDeletedSegments.get();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java
index d9e73a1..ce3c72c 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java
@@ -501,7 +501,7 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase {
         newIndexWriterConfig(new MockAnalyzer(random()))
           .setMergeScheduler(new SerialMergeScheduler())
           .setReaderPooling(true)
-          .setMergePolicy(new MergePolicyWrapper(newLogMergePolicy(2)) {
+          .setMergePolicy(new FilterMergePolicy(newLogMergePolicy(2)) {
             @Override
             public boolean keepFullyDeletedSegment(CodecReader reader) throws IOException {
               // we can do this because we add/delete/add (and dont merge to "nothing")

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/test/org/apache/lucene/index/TestMergePolicyWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMergePolicyWrapper.java b/lucene/core/src/test/org/apache/lucene/index/TestMergePolicyWrapper.java
deleted file mode 100644
index fa1b5d4..0000000
--- a/lucene/core/src/test/org/apache/lucene/index/TestMergePolicyWrapper.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-import java.lang.reflect.Method;
-import java.lang.reflect.Modifier;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestMergePolicyWrapper extends LuceneTestCase {
-
-  public void testMethodsOverridden() throws Exception {
-    for (Method m : MergePolicy.class.getDeclaredMethods()) {
-      if (Modifier.isFinal(m.getModifiers())) continue;
-      try {
-        MergePolicyWrapper.class.getDeclaredMethod(m.getName(),  m.getParameterTypes());
-      } catch (NoSuchMethodException e) {
-        fail("MergePolicyWrapper needs to override '" + m + "'");
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java
index 6e0d643..3c09bbd 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java
@@ -49,7 +49,7 @@ public class TestMultiFields extends LuceneTestCase {
       Directory dir = newDirectory();
 
       IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
-                                             .setMergePolicy(new MergePolicyWrapper(NoMergePolicy.INSTANCE) {
+                                             .setMergePolicy(new FilterMergePolicy(NoMergePolicy.INSTANCE) {
                                                @Override
                                                public boolean keepFullyDeletedSegment(CodecReader reader) {
                                                  // we can do this because we use NoMergePolicy (and dont merge to "nothing")

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/lucene/test-framework/src/java/org/apache/lucene/index/ForceMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/ForceMergePolicy.java b/lucene/test-framework/src/java/org/apache/lucene/index/ForceMergePolicy.java
index 02382e8..45aaa02 100755
--- a/lucene/test-framework/src/java/org/apache/lucene/index/ForceMergePolicy.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/ForceMergePolicy.java
@@ -30,7 +30,7 @@ import org.apache.lucene.index.SegmentInfos;
  *
  * @lucene.experimental
  */
-public final class ForceMergePolicy extends MergePolicyWrapper {
+public final class ForceMergePolicy extends FilterMergePolicy {
 
   /** Create a new {@code ForceMergePolicy} around the given {@code MergePolicy} */
   public ForceMergePolicy(MergePolicy in) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java
index 313b662..c3e5f5e 100644
--- a/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java
+++ b/solr/core/src/java/org/apache/solr/index/SortingMergePolicy.java
@@ -16,13 +16,13 @@
  */
 package org.apache.solr.index;
 
+import org.apache.lucene.index.FilterMergePolicy;
 import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.MergePolicyWrapper;
 import org.apache.lucene.search.Sort;
 
 // TODO: remove this and add indexSort specification directly to solrconfig.xml?  But for BWC, also accept SortingMergePolicy specifiction?
 
-public final class SortingMergePolicy extends MergePolicyWrapper {
+public final class SortingMergePolicy extends FilterMergePolicy {
 
   private final Sort sort;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7c0387ad/solr/test-framework/src/java/org/apache/solr/util/RandomMergePolicy.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/util/RandomMergePolicy.java b/solr/test-framework/src/java/org/apache/solr/util/RandomMergePolicy.java
index b556fce..b930a4f 100644
--- a/solr/test-framework/src/java/org/apache/solr/util/RandomMergePolicy.java
+++ b/solr/test-framework/src/java/org/apache/solr/util/RandomMergePolicy.java
@@ -18,7 +18,7 @@ package org.apache.solr.util;
 
 import java.lang.invoke.MethodHandles;
 import org.apache.lucene.index.MergePolicy;
-import org.apache.lucene.index.MergePolicyWrapper;
+import org.apache.lucene.index.FilterMergePolicy;
 import org.apache.lucene.util.LuceneTestCase;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -29,7 +29,7 @@ import org.slf4j.LoggerFactory;
  * Solr tests utilizing the Lucene randomized test framework can refer 
  * to this class in solrconfig.xml to get a fully randomized merge policy.
  */
-public class RandomMergePolicy extends MergePolicyWrapper {
+public class RandomMergePolicy extends FilterMergePolicy {
 
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

[02/46] lucene-solr:jira/solr-11833: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

Posted by ab...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5bd7b03e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5bd7b03e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5bd7b03e

Branch: refs/heads/jira/solr-11833
Commit: 5bd7b03e7191d4966590e96bcf9cdcbc4813a5cc
Parents: 0c71503 376f6c4
Author: Karl Wright <Da...@gmail.com>
Authored: Wed Apr 11 17:54:30 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Wed Apr 11 17:54:30 2018 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   9 +
 .../org/apache/lucene/index/IndexWriter.java    |  20 +
 .../org/apache/lucene/index/LogMergePolicy.java |   4 +-
 .../org/apache/lucene/index/MergePolicy.java    |  27 +-
 .../apache/lucene/index/MergePolicyWrapper.java |   7 +
 .../org/apache/lucene/index/NoMergePolicy.java  |   7 +
 .../apache/lucene/index/ReadersAndUpdates.java  |  91 ++-
 .../index/SoftDeletesRetentionMergePolicy.java  |  29 +-
 .../apache/lucene/index/TieredMergePolicy.java  |   2 +-
 .../org/apache/lucene/search/BooleanWeight.java |  35 +
 .../lucene/search/ConstantScoreQuery.java       |   5 +
 .../search/DisjunctionMatchesIterator.java      | 171 +++++
 .../lucene/search/DisjunctionMaxQuery.java      |  12 +
 .../lucene/search/DocValuesRewriteMethod.java   |  20 +-
 .../org/apache/lucene/search/FilterWeight.java  |   4 +
 .../lucene/search/IndexOrDocValuesQuery.java    |   6 +
 .../org/apache/lucene/search/LRUQueryCache.java |   5 +
 .../java/org/apache/lucene/search/Matches.java  | 146 +++++
 .../apache/lucene/search/MatchesIterator.java   |  79 +++
 .../MultiTermQueryConstantScoreWrapper.java     |  14 +-
 .../org/apache/lucene/search/SynonymQuery.java  |  13 +-
 .../apache/lucene/search/TermInSetQuery.java    |   9 +
 .../lucene/search/TermMatchesIterator.java      |  77 +++
 .../org/apache/lucene/search/TermQuery.java     |  21 +-
 .../java/org/apache/lucene/search/Weight.java   |  29 +
 .../java/org/apache/lucene/util/IOSupplier.java |  37 ++
 .../TestSoftDeletesRetentionMergePolicy.java    |  56 ++
 .../lucene/search/TestMatchesIterator.java      | 440 +++++++++++++
 .../search/join/ToParentBlockJoinQuery.java     |  23 +
 .../queries/function/FunctionScoreQuery.java    |   6 +
 .../org/apache/lucene/search/CoveringQuery.java |  22 +
 .../apache/lucene/search/AssertingMatches.java  |  43 ++
 .../lucene/search/AssertingMatchesIterator.java |  77 +++
 .../apache/lucene/search/AssertingWeight.java   |   8 +
 .../org/apache/lucene/search/CheckHits.java     |  55 +-
 .../org/apache/lucene/search/QueryUtils.java    |   2 +
 solr/CHANGES.txt                                |   2 +
 .../autoscaling/AutoAddReplicasPlanAction.java  |   4 +-
 .../solr/cloud/autoscaling/AutoScaling.java     |   3 +
 .../cloud/autoscaling/ComputePlanAction.java    |  21 +-
 .../cloud/autoscaling/IndexSizeTrigger.java     | 408 ++++++++++++
 .../solr/cloud/autoscaling/MetricTrigger.java   |   6 +-
 .../cloud/autoscaling/SearchRateTrigger.java    |   5 +-
 .../solr/cloud/autoscaling/TriggerEvent.java    |  16 +-
 .../org/apache/solr/cloud/CloudTestUtils.java   |  20 +-
 .../cloud/autoscaling/IndexSizeTriggerTest.java | 647 +++++++++++++++++++
 .../cloud/autoscaling/NodeAddedTriggerTest.java |   2 +-
 .../ScheduledMaintenanceTriggerTest.java        |   2 +-
 .../cloud/autoscaling/sim/SimCloudManager.java  | 103 ++-
 .../sim/SimClusterStateProvider.java            | 248 ++++++-
 .../autoscaling/sim/SimNodeStateProvider.java   |   4 +-
 .../autoscaling/sim/SimSolrCloudTestCase.java   |  60 +-
 .../cloud/autoscaling/sim/TestLargeCluster.java |  16 +-
 .../autoscaling/sim/TestTriggerIntegration.java |   2 +-
 .../src/solrcloud-autoscaling-triggers.adoc     |  77 +++
 .../client/solrj/cloud/autoscaling/Policy.java  |   3 +
 .../solrj/cloud/autoscaling/ReplicaInfo.java    |   4 +
 .../cloud/autoscaling/SplitShardSuggester.java  |  43 ++
 .../cloud/autoscaling/TriggerEventType.java     |   3 +-
 .../cloud/autoscaling/UnsupportedSuggester.java |  59 ++
 .../solr/common/params/CollectionParams.java    |   4 +-
 .../apache/solr/cloud/SolrCloudTestCase.java    |   2 +-
 62 files changed, 3194 insertions(+), 181 deletions(-)
----------------------------------------------------------------------

[18/46] lucene-solr:jira/solr-11833: SOLR-12028: BadApple and AwaitsFix annotations usage

Posted by ab...@apache.org.

SOLR-12028: BadApple and AwaitsFix annotations usage


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0014f3af
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0014f3af
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0014f3af

Branch: refs/heads/jira/solr-11833
Commit: 0014f3af88367961d8a7eb84a1a2333ecf66cb46
Parents: 2a2a0b6
Author: Erick Erickson <er...@apache.org>
Authored: Thu Apr 12 17:58:32 2018 -0700
Committer: Erick Erickson <er...@apache.org>
Committed: Thu Apr 12 17:58:32 2018 -0700

----------------------------------------------------------------------
 .../core/src/test/org/apache/lucene/index/TestIndexSorting.java  | 1 +
 solr/core/src/test/org/apache/solr/TestDistributedSearch.java    | 1 +
 solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java     | 1 +
 .../src/test/org/apache/solr/cloud/AliasIntegrationTest.java     | 1 +
 .../test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java | 1 +
 .../src/test/org/apache/solr/cloud/CreateRoutedAliasTest.java    | 4 ++++
 solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java  | 1 +
 solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java  | 1 +
 .../org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java | 3 +++
 .../src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java | 1 +
 .../api/collections/TestCollectionsAPIViaSolrCloudCluster.java   | 1 +
 .../org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java | 1 +
 .../org/apache/solr/cloud/autoscaling/ScheduledTriggerTest.java  | 1 +
 .../solr/cloud/autoscaling/sim/TestTriggerIntegration.java       | 1 +
 .../apache/solr/handler/admin/AutoscalingHistoryHandlerTest.java | 3 +--
 .../apache/solr/common/cloud/TestCollectionStateWatchers.java    | 1 +
 16 files changed, 21 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 3679d20..e29b9d0 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -2221,6 +2221,7 @@ public class TestIndexSorting extends LuceneTestCase {
   }
 
   // pits index time sorting against query time sorting
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testRandom3() throws Exception {
     int numDocs = atLeast(1000);
     List<RandomDoc> docs = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
index 9ca6e9e..e0d4951 100644
--- a/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
@@ -108,6 +108,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
   }
   
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void test() throws Exception {
     QueryResponse rsp = null;
     int backupStress = stress; // make a copy so we can restore

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
index 07b59aa..55b1848 100644
--- a/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/AddReplicaTest.java
@@ -46,6 +46,7 @@ public class AddReplicaTest extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void test() throws Exception {
     cluster.waitForAllNodes(5000);
     String collection = "addreplicatest_coll";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
index 2944a25..9858ea7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/AliasIntegrationTest.java
@@ -240,6 +240,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testModifyPropertiesV1() throws Exception {
     // note we don't use TZ in this test, thus it's UTC
     final String aliasName = getTestName();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
index 96fe67b..c14e0bc 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyNothingIsSafeTest.java
@@ -112,6 +112,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void test() throws Exception {
     // None of the operations used here are particularly costly, so this should work.
     // Using this low timeout will also help us catch index stalling.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/CreateRoutedAliasTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/CreateRoutedAliasTest.java b/solr/core/src/test/org/apache/solr/cloud/CreateRoutedAliasTest.java
index 73be71e..92135d6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CreateRoutedAliasTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CreateRoutedAliasTest.java
@@ -99,6 +99,7 @@ public class CreateRoutedAliasTest extends SolrCloudTestCase {
   // This is a fairly complete test where we set many options and see that it both affected the created
   //  collection and that the alias metadata was saved accordingly
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testV2() throws Exception {
     // note we don't use TZ in this test, thus it's UTC
     final String aliasName = getTestName();
@@ -180,6 +181,7 @@ public class CreateRoutedAliasTest extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testV1() throws Exception {
     final String aliasName = getTestName();
     final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
@@ -223,6 +225,7 @@ public class CreateRoutedAliasTest extends SolrCloudTestCase {
 
   // TZ should not affect the first collection name if absolute date given for start
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testTimezoneAbsoluteDate() throws Exception {
     final String aliasName = getTestName();
     try (SolrClient client = getCloudSolrClient(cluster)) {
@@ -241,6 +244,7 @@ public class CreateRoutedAliasTest extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testCollectionNamesMustBeAbsent() throws Exception {
     CollectionAdminRequest.createCollection("collection1meta", "_default", 2, 1).process(cluster.getSolrClient());
     CollectionAdminRequest.createCollection("collection2meta", "_default", 1, 1).process(cluster.getSolrClient());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
index 3208ebd..d9dbba0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DeleteReplicaTest.java
@@ -290,6 +290,7 @@ public class DeleteReplicaTest extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void deleteReplicaOnIndexing() throws Exception {
     final String collectionName = "deleteReplicaOnIndexing";
     CollectionAdminRequest.createCollection(collectionName, "conf", 1, 2)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
index 2cf8774..bdbb176 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudRecovery.java
@@ -77,6 +77,7 @@ public class TestCloudRecovery extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void leaderRecoverFromLogOnStartupTest() throws Exception {
     AtomicInteger countReplayLog = new AtomicInteger(0);
     DirectUpdateHandler2.commitOnClose = false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java b/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
index f099fc6..e3ee8bb 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestLeaderInitiatedRecoveryThread.java
@@ -30,6 +30,7 @@ import org.apache.solr.util.MockCoreContainer.MockCoreDescriptor;
 import org.apache.solr.util.TimeOut;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.data.Stat;
+import org.junit.Test;
 
 /**
  * Test for {@link LeaderInitiatedRecoveryThread}
@@ -43,6 +44,8 @@ public class TestLeaderInitiatedRecoveryThread extends AbstractFullDistribZkTest
     fixShardCount(2);
   }
 
+  @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testPublishDownState() throws Exception {
     waitForRecoveriesToFinish(true);
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java b/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
index e161095..8189779 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestStressInPlaceUpdates.java
@@ -93,6 +93,7 @@ public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
 
   @Test
   @ShardsFixed(num = 3)
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void stressTest() throws Exception {
     waitForRecoveriesToFinish(true);
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
index b966d8e..9cd534d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/TestCollectionsAPIViaSolrCloudCluster.java
@@ -172,6 +172,7 @@ public class TestCollectionsAPIViaSolrCloudCluster extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testCollectionCreateWithoutCoresThenDelete() throws Exception {
 
     final String collectionName = "testSolrCloudCollectionWithoutCores";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
index 4e48f59..b4fe16c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
@@ -414,6 +414,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testSelectedCollections() throws Exception {
     log.info("Found number of jetties: {}", cluster.getJettySolrRunners().size());
     AssertingTriggerAction.expectedNode = null;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledTriggerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledTriggerTest.java
index dc37577..bf5a390 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ScheduledTriggerTest.java
@@ -56,6 +56,7 @@ public class ScheduledTriggerTest extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testTrigger() throws Exception {
     CoreContainer container = cluster.getJettySolrRunners().get(0).getCoreContainer();
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestTriggerIntegration.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestTriggerIntegration.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestTriggerIntegration.java
index c898dbc..25f8e9e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestTriggerIntegration.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/sim/TestTriggerIntegration.java
@@ -327,6 +327,7 @@ public class TestTriggerIntegration extends SimSolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testNodeAddedTriggerRestoreState() throws Exception {
     // for this test we want to update the trigger so we must assert that the actions were created twice
     TestTriggerIntegration.actionInitCalled = new CountDownLatch(2);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/core/src/test/org/apache/solr/handler/admin/AutoscalingHistoryHandlerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/admin/AutoscalingHistoryHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/AutoscalingHistoryHandlerTest.java
index 9597ac7..23959fd 100644
--- a/solr/core/src/test/org/apache/solr/handler/admin/AutoscalingHistoryHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/admin/AutoscalingHistoryHandlerTest.java
@@ -242,8 +242,7 @@ public class AutoscalingHistoryHandlerTest extends SolrCloudTestCase {
   }
 
   @Test
-  // commented out 26-Mar-2018
-  //@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testHistory() throws Exception {
     waitForState("Timed out wait for collection be active", COLL_NAME,
         clusterShape(1, 3));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0014f3af/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java b/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java
index eb3ae7f..c28b6b2 100644
--- a/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java
+++ b/solr/solrj/src/test/org/apache/solr/common/cloud/TestCollectionStateWatchers.java
@@ -267,6 +267,7 @@ public class TestCollectionStateWatchers extends SolrCloudTestCase {
   }
 
   @Test
+  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
   public void testDeletionsTriggerWatches() throws Exception {
     CollectionAdminRequest.createCollection("tobedeleted", "config", 1, 1)
         .process(cluster.getSolrClient());

[33/46] lucene-solr:jira/solr-11833: LUCENE-8251: Handle near-parallelness with envelope plane by a progressive adjoining point distance increment, up to 100 iterations. Then, give up and assume a crossing.

Posted by ab...@apache.org.

LUCENE-8251: Handle near-parallelness with envelope plane by a progressive adjoining point distance increment, up to 100 iterations.  Then, give up and assume a crossing.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d78c354b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d78c354b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d78c354b

Branch: refs/heads/jira/solr-11833
Commit: d78c354bef3dd451ab584c7fe71bb614696d7fd6
Parents: f88a553
Author: Karl Wright <Da...@gmail.com>
Authored: Fri Apr 13 12:05:42 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Fri Apr 13 12:05:42 2018 -0400

----------------------------------------------------------------------
 .../spatial3d/geom/GeoComplexPolygon.java       | 82 +++++++++++++-------
 .../lucene/spatial3d/geom/GeoPolygonTest.java   |  1 -
 .../spatial3d/geom/RandomGeoPolygonTest.java    |  1 -
 3 files changed, 54 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d78c354b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
index b6b6577..73ed92e 100644
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
@@ -977,15 +977,18 @@ class GeoComplexPolygon extends GeoBasePolygon {
         for (final GeoPoint intersection : intersections) {
           if (edge.startPlane.strictlyWithin(intersection) && edge.endPlane.strictlyWithin(intersection)) {
             // It's unique, so assess it
-            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+            crossings += edgeCrossesEnvelope(edge.plane, intersection, envelopePlane)?1:0;
           }
         }
       }
       return crossings;
     }
 
-    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint) {
-      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint);
+    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint, final Plane envelopePlane) {
+      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint, envelopePlane);
+      if (adjoiningPoints == null) {
+        return true;
+      }
       int withinCount = 0;
       for (final GeoPoint adjoining : adjoiningPoints) {
         if (plane.evaluateIsZero(adjoining) && bound.isWithin(adjoining)) {
@@ -1070,15 +1073,18 @@ class GeoComplexPolygon extends GeoBasePolygon {
         for (final GeoPoint intersection : intersections) {
           if (edge.startPlane.strictlyWithin(intersection) && edge.endPlane.strictlyWithin(intersection)) {
             // It's unique, so assess it
-            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+            crossings += edgeCrossesEnvelope(edge.plane, intersection, envelopePlane)?1:0;
           }
         }
       }
       return crossings;
     }
 
-    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint) {
-      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint);
+    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint, final Plane envelopePlane) {
+      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint, envelopePlane);
+      if (adjoiningPoints == null) {
+        return true;
+      }
       int withinCount = 0;
       for (final GeoPoint adjoining : adjoiningPoints) {
         if (plane.evaluateIsZero(adjoining) && bound1.isWithin(adjoining) && bound2.isWithin(adjoining)) {
@@ -1325,10 +1331,10 @@ class GeoComplexPolygon extends GeoBasePolygon {
           break;
         }
       }
-
-      System.out.println("");
-      System.out.println("Considering edge "+(edge.startPoint)+" -> "+(edge.endPoint));
       */
+      
+      //System.out.println("");
+      //System.out.println("Considering edge "+(edge.startPoint)+" -> "+(edge.endPoint));
 
       // Some edges are going to be given to us even when there's no real intersection, so do that as a sanity check, first.
       final GeoPoint[] travelCrossings = travelPlane.findIntersections(planetModel, edge.plane, checkPointCutoffPlane, checkPointOtherCutoffPlane, edge.startPlane, edge.endPlane);
@@ -1411,8 +1417,8 @@ class GeoComplexPolygon extends GeoBasePolygon {
               continue;
             }
             // It's unique, so assess it
-            //System.out.println("  Assessing travel envelope intersection point "+intersection+"...");
-            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+            //System.out.println("  Assessing travel envelope intersection point "+intersection+", travelPlane distance="+travelPlane.evaluate(intersection)+"...");
+            crossings += edgeCrossesEnvelope(edge.plane, intersection, travelEnvelopePlane)?1:0;
           }
         }
       }
@@ -1420,8 +1426,8 @@ class GeoComplexPolygon extends GeoBasePolygon {
         for (final GeoPoint intersection : testPointIntersections) {
           if (edge.startPlane.strictlyWithin(intersection) && edge.endPlane.strictlyWithin(intersection)) {
             // It's unique, so assess it
-            //System.out.println("  Assessing testpoint envelope intersection point "+intersection+"...");
-            crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
+            //System.out.println("  Assessing testpoint envelope intersection point "+intersection+", testPointPlane distance="+testPointPlane.evaluate(intersection)+"...");
+            crossings += edgeCrossesEnvelope(edge.plane, intersection, testPointEnvelopePlane)?1:0;
           }
         }
       }
@@ -1430,16 +1436,20 @@ class GeoComplexPolygon extends GeoBasePolygon {
 
     /** Return true if the edge crosses the envelope plane, given the envelope intersection point.
       */
-    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint) {
-      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint);
+    private boolean edgeCrossesEnvelope(final Plane edgePlane, final GeoPoint intersectionPoint, final Plane envelopePlane) {
+      final GeoPoint[] adjoiningPoints = findAdjoiningPoints(edgePlane, intersectionPoint, envelopePlane);
+      if (adjoiningPoints == null) {
+        // Couldn't find good adjoining points, so just assume there is a crossing.
+        return true;
+      }
       int withinCount = 0;
       for (final GeoPoint adjoining : adjoiningPoints) {
         if ((travelPlane.evaluateIsZero(adjoining) && checkPointCutoffPlane.isWithin(adjoining) && checkPointOtherCutoffPlane.isWithin(adjoining)) ||
           (testPointPlane.evaluateIsZero(adjoining) && testPointCutoffPlane.isWithin(adjoining) && testPointOtherCutoffPlane.isWithin(adjoining))) {
-          //System.out.println("   Adjoining point "+adjoining+" (dist = "+intersectionPoint.linearDistance(adjoining)+") is within");
+          //System.out.println("   Adjoining point "+adjoining+" (intersection dist = "+intersectionPoint.linearDistance(adjoining)+") is within");
           withinCount++;
         } else {
-          //System.out.println("   Adjoining point "+adjoining+" (dist = "+intersectionPoint.linearDistance(adjoining)+") is not within");
+          //System.out.println("   Adjoining point "+adjoining+" (intersection dist = "+intersectionPoint.linearDistance(adjoining)+"; travelPlane dist="+travelPlane.evaluate(adjoining)+"; testPointPlane dist="+testPointPlane.evaluate(adjoining)+") is not within");
         }
       }
       return (withinCount & 1) != 0;
@@ -1450,23 +1460,39 @@ class GeoComplexPolygon extends GeoBasePolygon {
   /** This is the amount we go, roughly, in both directions, to find adjoining points to test.  If we go too far,
     * we might miss a transition, but if we go too little, we might not see it either due to numerical issues.
     */
-  private final static double DELTA_DISTANCE = Vector.MINIMUM_RESOLUTION;// * 0.5;
+  private final static double DELTA_DISTANCE = Vector.MINIMUM_RESOLUTION;
+  /** This is the maximum number of iterations.  If we get this high, effectively the planes are parallel, and we
+    * treat that as a crossing.
+    */
+  private final static int MAX_ITERATIONS = 100;
+  /** This is the amount off of the envelope plane that we count as "enough" for a valid crossing assessment. */
+  private final static double OFF_PLANE_AMOUNT = Vector.MINIMUM_RESOLUTION * 0.1;
   
   /** Given a point on the plane and the ellipsoid, this method looks for a pair of adjoining points on either side of the plane, which are
    * about MINIMUM_RESOLUTION away from the given point.  This only works for planes which go through the center of the world.
+   * Returns null if the planes are effectively parallel and reasonable adjoining points cannot be determined.
    */
-  private GeoPoint[] findAdjoiningPoints(final Plane plane, final GeoPoint pointOnPlane) {
+  private GeoPoint[] findAdjoiningPoints(final Plane plane, final GeoPoint pointOnPlane, final Plane envelopePlane) {
     // Compute a normalized perpendicular vector
     final Vector perpendicular = new Vector(plane, pointOnPlane);
-    // Compute two new points along this vector from the original
-    final GeoPoint pointA = planetModel.createSurfacePoint(pointOnPlane.x + perpendicular.x * DELTA_DISTANCE,
-      pointOnPlane.y + perpendicular.y * DELTA_DISTANCE,
-      pointOnPlane.z + perpendicular.z * DELTA_DISTANCE);
-    final GeoPoint pointB = planetModel.createSurfacePoint(pointOnPlane.x - perpendicular.x * DELTA_DISTANCE,
-      pointOnPlane.y - perpendicular.y * DELTA_DISTANCE,
-      pointOnPlane.z - perpendicular.z * DELTA_DISTANCE);
-    //System.out.println("Distance: "+computeSquaredDistance(rval[0], pointOnPlane)+" and "+computeSquaredDistance(rval[1], pointOnPlane));
-    return new GeoPoint[]{pointA, pointB};
+    double distanceFactor = 0.0;
+    for (int i = 0; i < MAX_ITERATIONS; i++) {
+      distanceFactor += DELTA_DISTANCE;
+      // Compute two new points along this vector from the original
+      final GeoPoint pointA = planetModel.createSurfacePoint(pointOnPlane.x + perpendicular.x * distanceFactor,
+        pointOnPlane.y + perpendicular.y * distanceFactor,
+        pointOnPlane.z + perpendicular.z * distanceFactor);
+      final GeoPoint pointB = planetModel.createSurfacePoint(pointOnPlane.x - perpendicular.x * distanceFactor,
+        pointOnPlane.y - perpendicular.y * distanceFactor,
+        pointOnPlane.z - perpendicular.z * distanceFactor);
+      if (Math.abs(envelopePlane.evaluate(pointA)) > OFF_PLANE_AMOUNT && Math.abs(envelopePlane.evaluate(pointB)) > OFF_PLANE_AMOUNT) {
+        //System.out.println("Distance: "+computeSquaredDistance(rval[0], pointOnPlane)+" and "+computeSquaredDistance(rval[1], pointOnPlane));
+        return new GeoPoint[]{pointA, pointB};
+      }
+      // Loop back around and use a bigger delta
+    }
+    // Had to abort, so return null.
+    return null;
   }
 
   private static double computeSquaredDistance(final GeoPoint checkPoint, final GeoPoint intersectionPoint) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d78c354b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
index 86f5694..524475a 100755
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
@@ -1570,7 +1570,6 @@ shape:
   }
   
   @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
   public void testLUCENE8251() {
     //POLYGON((135.63207358036593 -51.43541696593334,113.00782694696038 -58.984559858566556,0.0 -3.68E-321,-66.33598777585381 -7.382056816201731,135.63207358036593 -51.43541696593334))
     final List<GeoPoint> points = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d78c354b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
index b6364e0..a181d17 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
@@ -92,7 +92,6 @@ public class RandomGeoPolygonTest extends RandomGeo3dShapeGenerator {
    * biased doubles.
    */
   @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
   @Repeat(iterations = 10)
   public void testComparePolygons() {
     final PlanetModel planetModel = randomPlanetModel();

[06/46] lucene-solr:jira/solr-11833: LUCENE-8245: Re-solve the 'intersection outside the world' case.

Posted by ab...@apache.org.

LUCENE-8245: Re-solve the 'intersection outside the world' case.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0b1e8ef7
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0b1e8ef7
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0b1e8ef7

Branch: refs/heads/jira/solr-11833
Commit: 0b1e8ef72e2e9ae75a1929a00b7137dfb1a75b12
Parents: 5bd7b03
Author: Karl Wright <Da...@gmail.com>
Authored: Thu Apr 12 06:39:32 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Thu Apr 12 06:39:32 2018 -0400

----------------------------------------------------------------------
 .../spatial3d/geom/GeoComplexPolygon.java       | 38 ++++++++++----------
 .../lucene/spatial3d/geom/GeoPolygonTest.java   |  2 +-
 2 files changed, 21 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b1e8ef7/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
index 81443a5..487a771 100644
--- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
+++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/GeoComplexPolygon.java
@@ -81,37 +81,37 @@ class GeoComplexPolygon extends GeoBasePolygon {
     this.testPointFixedZPlane = new Plane(0.0, 0.0, 1.0, -testPoint.z);
     
     Plane fixedYAbovePlane = new Plane(testPointFixedYPlane, true);
-    if (fixedYAbovePlane.D - planetModel.getMaximumYValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumYValue() - fixedYAbovePlane.D >= Vector.MINIMUM_RESOLUTION) {
+    if (fixedYAbovePlane.D - planetModel.getMaximumYValue() > 0.0 || planetModel.getMinimumYValue() - fixedYAbovePlane.D > 0.0) {
         fixedYAbovePlane = null;
     }
     this.testPointFixedYAbovePlane = fixedYAbovePlane;
     
     Plane fixedYBelowPlane = new Plane(testPointFixedYPlane, false);
-    if (fixedYBelowPlane.D - planetModel.getMaximumYValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumYValue() - fixedYBelowPlane.D >= Vector.MINIMUM_RESOLUTION) {
+    if (fixedYBelowPlane.D - planetModel.getMaximumYValue() > 0.0 ||  planetModel.getMinimumYValue() - fixedYBelowPlane.D > 0.0) {
         fixedYBelowPlane = null;
     }
     this.testPointFixedYBelowPlane = fixedYBelowPlane;
     
     Plane fixedXAbovePlane = new Plane(testPointFixedXPlane, true);
-    if (fixedXAbovePlane.D - planetModel.getMaximumXValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumXValue() - fixedXAbovePlane.D >= Vector.MINIMUM_RESOLUTION) {
+    if (fixedXAbovePlane.D - planetModel.getMaximumXValue() > 0.0 || planetModel.getMinimumXValue() - fixedXAbovePlane.D > 0.0) {
         fixedXAbovePlane = null;
     }
     this.testPointFixedXAbovePlane = fixedXAbovePlane;
     
     Plane fixedXBelowPlane = new Plane(testPointFixedXPlane, false);
-    if (fixedXBelowPlane.D - planetModel.getMaximumXValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumXValue() - fixedXBelowPlane.D >= Vector.MINIMUM_RESOLUTION) {
+    if (fixedXBelowPlane.D - planetModel.getMaximumXValue() > 0.0 || planetModel.getMinimumXValue() - fixedXBelowPlane.D > 0.0) {
         fixedXBelowPlane = null;
     }
     this.testPointFixedXBelowPlane = fixedXBelowPlane;
     
     Plane fixedZAbovePlane = new Plane(testPointFixedZPlane, true);
-    if (fixedZAbovePlane.D - planetModel.getMaximumZValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumZValue() - fixedZAbovePlane.D >= Vector.MINIMUM_RESOLUTION) {
+    if (fixedZAbovePlane.D - planetModel.getMaximumZValue() > 0.0 ||planetModel.getMinimumZValue() - fixedZAbovePlane.D > 0.0) {
         fixedZAbovePlane = null;
     }
     this.testPointFixedZAbovePlane = fixedZAbovePlane;
     
     Plane fixedZBelowPlane = new Plane(testPointFixedZPlane, false);
-    if (fixedZBelowPlane.D - planetModel.getMaximumZValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumZValue() - fixedZBelowPlane.D >= Vector.MINIMUM_RESOLUTION) {
+    if (fixedZBelowPlane.D - planetModel.getMaximumZValue() > 0.0 || planetModel.getMinimumZValue() - fixedZBelowPlane.D > 0.0) {
         fixedZBelowPlane = null;
     }
     this.testPointFixedZBelowPlane = fixedZBelowPlane;
@@ -234,32 +234,32 @@ class GeoComplexPolygon extends GeoBasePolygon {
       final Plane travelPlaneFixedZ = new Plane(0.0, 0.0, 1.0, -z);
 
       Plane fixedYAbovePlane = new Plane(travelPlaneFixedY, true);
-      if (fixedYAbovePlane.D - planetModel.getMaximumYValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumYValue() - fixedYAbovePlane.D >= Vector.MINIMUM_RESOLUTION) {
+      if (fixedYAbovePlane.D - planetModel.getMaximumYValue() > 0.0 || planetModel.getMinimumYValue() - fixedYAbovePlane.D > 0.0) {
           fixedYAbovePlane = null;
       }
       
       Plane fixedYBelowPlane = new Plane(travelPlaneFixedY, false);
-      if (fixedYBelowPlane.D - planetModel.getMaximumYValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumYValue() - fixedYBelowPlane.D >= Vector.MINIMUM_RESOLUTION) {
+      if (fixedYBelowPlane.D - planetModel.getMaximumYValue() > 0.0 || planetModel.getMinimumYValue() - fixedYBelowPlane.D > 0.0) {
           fixedYBelowPlane = null;
       }
       
       Plane fixedXAbovePlane = new Plane(travelPlaneFixedX, true);
-      if (fixedXAbovePlane.D - planetModel.getMaximumXValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumXValue() - fixedXAbovePlane.D >= Vector.MINIMUM_RESOLUTION) {
+      if (fixedXAbovePlane.D - planetModel.getMaximumXValue() > 0.0 || planetModel.getMinimumXValue() - fixedXAbovePlane.D > 0.0) {
           fixedXAbovePlane = null;
       }
       
       Plane fixedXBelowPlane = new Plane(travelPlaneFixedX, false);
-      if (fixedXBelowPlane.D - planetModel.getMaximumXValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumXValue() - fixedXBelowPlane.D >= Vector.MINIMUM_RESOLUTION) {
+      if (fixedXBelowPlane.D - planetModel.getMaximumXValue() > 0.0 || planetModel.getMinimumXValue() - fixedXBelowPlane.D > 0.0) {
           fixedXBelowPlane = null;
       }
       
       Plane fixedZAbovePlane = new Plane(travelPlaneFixedZ, true);
-      if (fixedZAbovePlane.D - planetModel.getMaximumZValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumZValue() - fixedZAbovePlane.D >= Vector.MINIMUM_RESOLUTION) {
+      if (fixedZAbovePlane.D - planetModel.getMaximumZValue() > 0.0 || planetModel.getMinimumZValue() - fixedZAbovePlane.D > 0.0) {
           fixedZAbovePlane = null;
       }
       
       Plane fixedZBelowPlane = new Plane(travelPlaneFixedZ, false);
-      if (fixedZBelowPlane.D - planetModel.getMaximumZValue() >= Vector.MINIMUM_RESOLUTION || planetModel.getMinimumZValue() - fixedZBelowPlane.D >= Vector.MINIMUM_RESOLUTION) {
+      if (fixedZBelowPlane.D - planetModel.getMaximumZValue() > 0.0 || planetModel.getMinimumZValue() - fixedZBelowPlane.D > 0.0) {
           fixedZBelowPlane = null;
       }
 
@@ -1208,12 +1208,14 @@ class GeoComplexPolygon extends GeoBasePolygon {
           travelOutsidePlane = travelAbovePlane;
           testPointOutsidePlane = testPointAbovePlane;
           insideInsidePoints = belowBelow;
-        } else {
+        } else if (belowAbove.length > 0) {
           travelInsidePlane = travelBelowPlane;
           testPointInsidePlane = testPointAbovePlane;
           travelOutsidePlane = travelAbovePlane;
           testPointOutsidePlane = testPointBelowPlane;
           insideInsidePoints = belowAbove;
+        } else {
+          throw new IllegalStateException("Can't find traversal intersection among: "+travelAbovePlane+", "+testPointAbovePlane+", "+travelBelowPlane+", "+testPointBelowPlane);
         }
         
         // Get the inside-inside intersection point
@@ -1297,7 +1299,7 @@ class GeoComplexPolygon extends GeoBasePolygon {
       // We've never seen this edge before.  Evaluate it in the context of inner and outer planes.
       computeInsideOutside();
 
-      /* 
+      /*
       System.out.println("\nThe following edges should intersect the travel/testpoint planes:");
       Edge thisEdge = edge;
       while (true) {
@@ -1314,8 +1316,7 @@ class GeoComplexPolygon extends GeoBasePolygon {
           break;
         }
       }
-      */
-      /*
+
       System.out.println("");
       System.out.println("Considering edge "+(edge.startPoint)+" -> "+(edge.endPoint));
       */
@@ -1390,6 +1391,7 @@ class GeoComplexPolygon extends GeoBasePolygon {
           if (testPointIntersections != null) {
             for (final GeoPoint otherIntersection : testPointIntersections) {
               if (intersection.isNumericallyIdentical(otherIntersection)) {
+                //System.out.println("  Points "+intersection+" and "+otherIntersection+" are duplicates");
                 notDup = false;
                 break;
               }
@@ -1399,14 +1401,14 @@ class GeoComplexPolygon extends GeoBasePolygon {
             continue;
           }
           // It's unique, so assess it
-          //System.out.println("  Assessing travel intersection point "+intersection+"...");
+          //System.out.println("  Assessing travel envelope intersection point "+intersection+"...");
           crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
         }
       }
       if (testPointIntersections != null) {
         for (final GeoPoint intersection : testPointIntersections) {
           // It's unique, so assess it
-          //System.out.println("  Assessing testpoint intersection point "+intersection+"...");
+          //System.out.println("  Assessing testpoint envelope intersection point "+intersection+"...");
           crossings += edgeCrossesEnvelope(edge.plane, intersection)?1:0;
         }
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b1e8ef7/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
index 1311f4e..cd65018 100755
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/GeoPolygonTest.java
@@ -1428,7 +1428,7 @@ shape:
   }
 
   @Test
-  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
+  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
   public void testComplexPolygonPlaneOutsideWorld() {
     List<GeoPoint> points = new ArrayList<>();
     points.add(new GeoPoint(PlanetModel.SPHERE, -0.5, -0.5));

[31/46] lucene-solr:jira/solr-11833: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

Posted by ab...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/f88a553a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/f88a553a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/f88a553a

Branch: refs/heads/jira/solr-11833
Commit: f88a553a91207d911c99d17cb3a23d9eeb6b3665
Parents: 79350bd 0544486
Author: Karl Wright <Da...@gmail.com>
Authored: Fri Apr 13 09:40:11 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Fri Apr 13 09:40:11 2018 -0400

----------------------------------------------------------------------
 dev-tools/idea/.idea/modules.xml                |   1 +
 dev-tools/idea/lucene/analysis/nori/nori.iml    |  22 +
 .../maven/lucene/analysis/nori/pom.xml.template |  75 ++
 .../maven/lucene/analysis/pom.xml.template      |   1 +
 lucene/CHANGES.txt                              |   4 +
 lucene/NOTICE.txt                               |  12 +
 lucene/analysis/README.txt                      |   5 +
 lucene/analysis/build.xml                       |   7 +-
 lucene/analysis/nori/build.xml                  | 135 +++
 lucene/analysis/nori/ivy.xml                    |  33 +
 .../lucene/analysis/ko/DecompoundToken.java     |  69 ++
 .../lucene/analysis/ko/DictionaryToken.java     | 100 ++
 .../lucene/analysis/ko/GraphvizFormatter.java   | 180 ++++
 .../lucene/analysis/ko/KoreanAnalyzer.java      |  78 ++
 .../ko/KoreanPartOfSpeechStopFilter.java        |  85 ++
 .../ko/KoreanPartOfSpeechStopFilterFactory.java |  51 +
 .../analysis/ko/KoreanReadingFormFilter.java    |  51 +
 .../ko/KoreanReadingFormFilterFactory.java      |  42 +
 .../lucene/analysis/ko/KoreanTokenizer.java     | 957 +++++++++++++++++++
 .../analysis/ko/KoreanTokenizerFactory.java     |  89 ++
 .../java/org/apache/lucene/analysis/ko/POS.java | 304 ++++++
 .../org/apache/lucene/analysis/ko/Token.java    | 125 +++
 .../analysis/ko/dict/BinaryDictionary.java      | 239 +++++
 .../analysis/ko/dict/CharacterDefinition.java   | 136 +++
 .../analysis/ko/dict/ConnectionCosts.java       |  96 ++
 .../lucene/analysis/ko/dict/Dictionary.java     |  83 ++
 .../analysis/ko/dict/TokenInfoDictionary.java   |  77 ++
 .../lucene/analysis/ko/dict/TokenInfoFST.java   |  85 ++
 .../analysis/ko/dict/UnknownDictionary.java     |  61 ++
 .../lucene/analysis/ko/dict/UserDictionary.java | 235 +++++
 .../lucene/analysis/ko/dict/package-info.java   |  21 +
 .../apache/lucene/analysis/ko/package-info.java |  21 +
 .../tokenattributes/PartOfSpeechAttribute.java  |  54 ++
 .../PartOfSpeechAttributeImpl.java              |  92 ++
 .../ko/tokenattributes/ReadingAttribute.java    |  38 +
 .../tokenattributes/ReadingAttributeImpl.java   |  55 ++
 .../ko/tokenattributes/package-info.java        |  21 +
 .../apache/lucene/analysis/ko/util/CSVUtil.java |  95 ++
 .../lucene/analysis/ko/util/package-info.java   |  21 +
 lucene/analysis/nori/src/java/overview.html     |  34 +
 ...ache.lucene.analysis.util.TokenFilterFactory |  16 +
 ...apache.lucene.analysis.util.TokenizerFactory |  16 +
 .../analysis/ko/dict/CharacterDefinition.dat    | Bin 0 -> 65564 bytes
 .../lucene/analysis/ko/dict/ConnectionCosts.dat | Bin 0 -> 11178837 bytes
 .../ko/dict/TokenInfoDictionary$buffer.dat      | Bin 0 -> 7245625 bytes
 .../ko/dict/TokenInfoDictionary$fst.dat         | Bin 0 -> 5640925 bytes
 .../ko/dict/TokenInfoDictionary$posDict.dat     | Bin 0 -> 2712 bytes
 .../ko/dict/TokenInfoDictionary$targetMap.dat   | Bin 0 -> 811783 bytes
 .../ko/dict/UnknownDictionary$buffer.dat        | Bin 0 -> 101 bytes
 .../ko/dict/UnknownDictionary$posDict.dat       | Bin 0 -> 1823 bytes
 .../ko/dict/UnknownDictionary$targetMap.dat     | Bin 0 -> 36 bytes
 .../analysis/ko/StringMockResourceLoader.java   |  58 ++
 .../lucene/analysis/ko/TestKoreanAnalyzer.java  | 109 +++
 ...TestKoreanPartOfSpeechStopFilterFactory.java |  59 ++
 .../ko/TestKoreanReadingFormFilter.java         |  75 ++
 .../ko/TestKoreanReadingFormFilterFactory.java  |  51 +
 .../lucene/analysis/ko/TestKoreanTokenizer.java | 355 +++++++
 .../analysis/ko/TestKoreanTokenizerFactory.java | 113 +++
 .../ko/dict/TestTokenInfoDictionary.java        | 113 +++
 .../analysis/ko/dict/UserDictionaryTest.java    |  62 ++
 .../org/apache/lucene/analysis/ko/userdict.txt  |   5 +
 .../ko/util/BinaryDictionaryWriter.java         | 282 ++++++
 .../ko/util/CharacterDefinitionWriter.java      |  94 ++
 .../ko/util/ConnectionCostsBuilder.java         |  67 ++
 .../analysis/ko/util/ConnectionCostsWriter.java |  75 ++
 .../analysis/ko/util/DictionaryBuilder.java     |  67 ++
 .../ko/util/TokenInfoDictionaryBuilder.java     | 150 +++
 .../ko/util/TokenInfoDictionaryWriter.java      |  49 +
 .../ko/util/UnknownDictionaryBuilder.java       | 134 +++
 .../ko/util/UnknownDictionaryWriter.java        |  65 ++
 .../analysis/ko/dict/UnknownDictionaryTest.java |  74 ++
 .../org/apache/lucene/index/PendingDeletes.java |   4 +-
 .../apache/lucene/index/PendingSoftDeletes.java |   8 +-
 .../apache/lucene/index/ReadersAndUpdates.java  |   2 +-
 lucene/ivy-versions.properties                  |   1 +
 lucene/module-build.xml                         |  22 +
 .../cloud/autoscaling/IndexSizeTriggerTest.java |   2 +
 .../sim/SimClusterStateProvider.java            |   9 +-
 78 files changed, 6017 insertions(+), 10 deletions(-)
----------------------------------------------------------------------

[44/46] lucene-solr:jira/solr-11833: SOLR-12181: Fix test failures.

Posted by ab...@apache.org.

SOLR-12181: Fix test failures.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1e759bc5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1e759bc5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1e759bc5

Branch: refs/heads/jira/solr-11833
Commit: 1e759bc50580f643af67dc7f25ef5cd71bd29825
Parents: 19fa91d
Author: Andrzej Bialecki <ab...@apache.org>
Authored: Mon Apr 16 13:15:33 2018 +0200
Committer: Andrzej Bialecki <ab...@apache.org>
Committed: Mon Apr 16 13:16:05 2018 +0200

----------------------------------------------------------------------
 .../cloud/autoscaling/IndexSizeTriggerTest.java | 26 +++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1e759bc5/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
index 3bf7021..265a6c6 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/IndexSizeTriggerTest.java
@@ -20,6 +20,7 @@ package org.apache.solr.cloud.autoscaling;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -29,7 +30,6 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
 
-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrRequest;
@@ -39,12 +39,14 @@ import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
 import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.cloud.CloudTestUtils;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.params.CollectionParams;
+import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.Pair;
 import org.apache.solr.common.util.TimeSource;
@@ -65,7 +67,6 @@ import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_P
  *
  */
 @LogLevel("org.apache.solr.cloud.autoscaling=DEBUG")
-@LuceneTestCase.AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12181")
 public class IndexSizeTriggerTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -457,6 +458,25 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
     }
     solrClient.commit(collectionName);
 
+    // check the actual size of shard to set the threshold
+    QueryResponse rsp = solrClient.query(params(CommonParams.QT, "/admin/metrics", "group", "core"));
+    NamedList<Object> nl = rsp.getResponse();
+    nl = (NamedList<Object>)nl.get("metrics");
+    int maxSize = 0;
+    for (Iterator<Map.Entry<String, Object>> it = nl.iterator(); it.hasNext(); ) {
+      Map.Entry<String, Object> e = it.next();
+      NamedList<Object> metrics = (NamedList<Object>)e.getValue();
+      Object o = metrics.get("INDEX.sizeInBytes");
+      assertNotNull("INDEX.sizeInBytes missing: " + metrics, o);
+      assertTrue("not a number", o instanceof Number);
+      if (maxSize < ((Number)o).intValue()) {
+        maxSize = ((Number)o).intValue();
+      }
+    }
+    assertTrue("maxSize should be non-zero", maxSize > 0);
+
+    int aboveBytes = maxSize * 2 / 3;
+
     long waitForSeconds = 3 + random().nextInt(5);
 
     // the trigger is initially disabled so that we have time to add listeners
@@ -471,7 +491,7 @@ public class IndexSizeTriggerTest extends SolrCloudTestCase {
         // hit this limit when deleting
         "'belowDocs' : 100," +
         // hit this limit when indexing
-        "'aboveBytes' : 150000," +
+        "'aboveBytes' : " + aboveBytes + "," +
         // don't hit this limit when deleting
         "'belowBytes' : 10," +
         "'enabled' : false," +

[21/46] lucene-solr:jira/solr-11833: LUCENE-8251: Annotate occasionally failing test with AwaitsFix

Posted by ab...@apache.org.

LUCENE-8251: Annotate occasionally failing test with AwaitsFix


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e8f1649a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e8f1649a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e8f1649a

Branch: refs/heads/jira/solr-11833
Commit: e8f1649ab4f5f79cd1dc6b7b4f26c5f6ec133bc5
Parents: 71936db
Author: Karl Wright <Da...@gmail.com>
Authored: Thu Apr 12 23:46:40 2018 -0400
Committer: Karl Wright <Da...@gmail.com>
Committed: Thu Apr 12 23:46:40 2018 -0400

----------------------------------------------------------------------
 .../org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e8f1649a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
index ed2261b..b6364e0 100644
--- a/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
+++ b/lucene/spatial3d/src/test/org/apache/lucene/spatial3d/geom/RandomGeoPolygonTest.java
@@ -92,7 +92,7 @@ public class RandomGeoPolygonTest extends RandomGeo3dShapeGenerator {
    * biased doubles.
    */
   @Test
-  //@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8245")
+  @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/LUCENE-8251")
   @Repeat(iterations = 10)
   public void testComparePolygons() {
     final PlanetModel planetModel = randomPlanetModel();

[10/46] lucene-solr:jira/solr-11833: LUCENE-8233: Harden PendingDeletes modification assertions

Posted by ab...@apache.org.

LUCENE-8233: Harden PendingDeletes modification assertions

This change adds a missing call to PendingDeletes#onNewReader and
hardens the assertion when a PendingDelete can actually be modified ie.
receive deletes and updates. Now PendingDeltes are also initialized
when no reader is provided but the SegmentCommitInfo has evidence that there
is are no deletes.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9cb07c73
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9cb07c73
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9cb07c73

Branch: refs/heads/jira/solr-11833
Commit: 9cb07c735b2095184041280bf03b3df9bfa010a1
Parents: 017f59b
Author: Simon Willnauer <si...@apache.org>
Authored: Thu Apr 12 11:35:15 2018 +0200
Committer: Simon Willnauer <si...@apache.org>
Committed: Thu Apr 12 12:44:34 2018 +0200

----------------------------------------------------------------------
 .../org/apache/lucene/index/PendingDeletes.java |  8 ++++++-
 .../apache/lucene/index/PendingSoftDeletes.java |  3 ++-
 .../apache/lucene/index/ReadersAndUpdates.java  |  1 +
 .../apache/lucene/index/TestPendingDeletes.java |  8 +++----
 .../lucene/index/TestPendingSoftDeletes.java    | 23 +++++++++++++++-----
 5 files changed, 31 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cb07c73/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
index bce704c..fca42b4 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
@@ -52,7 +52,10 @@ class PendingDeletes {
   }
 
   PendingDeletes(SegmentCommitInfo info) {
-    this(info, null, false);
+    this(info, null, info.hasDeletions() == false);
+    // if we don't have deletions we can mark it as initialized since we might receive deletes on a segment
+    // without having a reader opened on it ie. after a merge when we apply the deletes that IW received while merging.
+    // For segments that were published we enforce a reader in the BufferedUpdatesStream.SegmentState ctor
   }
 
   private PendingDeletes(SegmentCommitInfo info, Bits liveDocs, boolean liveDocsInitialized) {
@@ -65,6 +68,9 @@ class PendingDeletes {
 
 
   protected MutableBits getMutableBits() throws IOException {
+    // if we pull mutable bits but we haven't been initialized something is completely off.
+    // this means we receive deletes without having the bitset that is on-disk ready to be cloned
+    assert liveDocsInitialized : "can't delete if liveDocs are not initialized";
     if (liveDocsShared) {
       // Copy on write: this means we've cloned a
       // SegmentReader sharing the current liveDocs

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cb07c73/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
index b73ac83..0d7852b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
@@ -67,7 +67,7 @@ final class PendingSoftDeletes extends PendingDeletes {
   void onNewReader(SegmentReader reader, SegmentCommitInfo info) throws IOException {
     super.onNewReader(reader, info);
     hardDeletes.onNewReader(reader, info);
-    if (dvGeneration != info.getDocValuesGen()) { // only re-calculate this if we haven't seen this generation
+    if (dvGeneration < info.getDocValuesGen()) { // only re-calculate this if we haven't seen this generation
       final DocIdSetIterator iterator = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(field, reader);
       if (iterator == null) { // nothing is deleted we don't have a soft deletes field in this segment
         this.pendingDeleteCount = 0;
@@ -120,6 +120,7 @@ final class PendingSoftDeletes extends PendingDeletes {
   void onDocValuesUpdate(FieldInfo info, List<DocValuesFieldUpdates> updatesToApply) throws IOException {
     if (field.equals(info.name)) {
       assert dvGeneration < info.getDocValuesGen() : "we have seen this generation update already: " + dvGeneration + " vs. " + info.getDocValuesGen();
+      assert dvGeneration != -2 : "docValues generation is still uninitialized";
       DocValuesFieldUpdates.Iterator[] subs = new DocValuesFieldUpdates.Iterator[updatesToApply.size()];
       for(int i=0; i<subs.length; i++) {
         subs[i] = updatesToApply.get(i).iterator();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cb07c73/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
index 4851d45..6dc8864 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
@@ -619,6 +619,7 @@ final class ReadersAndUpdates {
       final SegmentReader reader;
       if (this.reader == null) {
         reader = new SegmentReader(info, indexCreatedVersionMajor, IOContext.READONCE);
+        pendingDeletes.onNewReader(reader, info);
       } else {
         reader = this.reader;
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cb07c73/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java
index e150e06..bbe309a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java
@@ -40,7 +40,7 @@ public class TestPendingDeletes extends LuceneTestCase {
     RAMDirectory dir = new RAMDirectory();
     SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(),
         Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
-    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, 0, 0);
+    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, -1, -1, -1);
     PendingDeletes deletes = newPendingDeletes(commitInfo);
     assertNull(deletes.getLiveDocs());
     int docToDelete = TestUtil.nextInt(random(), 0, 7);
@@ -76,7 +76,7 @@ public class TestPendingDeletes extends LuceneTestCase {
     RAMDirectory dir = new RAMDirectory();
     SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 6, false, Codec.getDefault(),
         Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
-    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, 0, 0);
+    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, -1, -1, -1);
     PendingDeletes deletes = newPendingDeletes(commitInfo);
     assertFalse(deletes.writeLiveDocs(dir));
     assertEquals(0, dir.listAll().length);
@@ -86,7 +86,7 @@ public class TestPendingDeletes extends LuceneTestCase {
       deletes.liveDocsShared();
       deletes.delete(2);
     }
-    assertEquals(0, commitInfo.getDelGen());
+    assertEquals(-1, commitInfo.getDelGen());
     assertEquals(0, commitInfo.getDelCount());
 
     assertEquals(secondDocDeletes ? 2 : 1, deletes.numPendingDeletes());
@@ -133,7 +133,7 @@ public class TestPendingDeletes extends LuceneTestCase {
     RAMDirectory dir = new RAMDirectory();
     SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
         Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
-    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, 0, 0);
+    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, -1, -1, -1);
     PendingDeletes deletes = newPendingDeletes(commitInfo);
     for (int i = 0; i < 3; i++) {
       assertTrue(deletes.delete(i));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cb07c73/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
index c428a4b..255ff9e 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
@@ -45,7 +45,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
 
   public void testDeleteSoft() throws IOException {
     Directory dir = newDirectory();
-    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); // no soft delete field hier
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); // no soft delete field here
     Document doc = new Document();
     doc.add(new StringField("id", "1", Field.Store.YES));
     writer.softUpdateDocument(new Term("id", "1"), doc,
@@ -97,8 +97,20 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     RAMDirectory dir = new RAMDirectory();
     SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(),
         Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
-    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, 0, 0);
+    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, -1, -1, -1);
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
+    for (int i = 0; i < si.maxDoc(); i++) {
+      writer.addDocument(new Document());
+    }
+    writer.forceMerge(1);
+    writer.commit();
+    DirectoryReader reader = writer.getReader();
+    assertEquals(1, reader.leaves().size());
+    SegmentReader segmentReader = (SegmentReader) reader.leaves().get(0).reader();
     PendingSoftDeletes deletes = newPendingDeletes(commitInfo);
+    deletes.onNewReader(segmentReader, commitInfo);
+    reader.close();
+    writer.close();
     FieldInfo fieldInfo = new FieldInfo("_soft_deletes", 1, false, false, false, IndexOptions.NONE, DocValuesType.NUMERIC, 0, Collections.emptyMap(), 0, 0);
     List<Integer> docsDeleted = Arrays.asList(1, 3, 7, 8, DocIdSetIterator.NO_MORE_DOCS);
     List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 10));
@@ -152,10 +164,9 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     assertEquals(1, reader.leaves().size());
     SegmentReader segmentReader = (SegmentReader) reader.leaves().get(0).reader();
     SegmentCommitInfo segmentInfo = segmentReader.getSegmentInfo();
-    SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
-        Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
-    PendingSoftDeletes deletes = newPendingDeletes(segmentInfo);
-    FieldInfo fieldInfo = new FieldInfo("_soft_deletes", 1, false, false, false, IndexOptions.NONE, DocValuesType.NUMERIC, segmentInfo.getDocValuesGen(), Collections.emptyMap(), 0, 0);
+    PendingDeletes deletes = newPendingDeletes(segmentInfo);
+    deletes.onNewReader(segmentReader, segmentInfo);
+    FieldInfo fieldInfo = new FieldInfo("_soft_deletes", 1, false, false, false, IndexOptions.NONE, DocValuesType.NUMERIC, segmentInfo.getNextDocValuesGen(), Collections.emptyMap(), 0, 0);
     List<Integer> docsDeleted = Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS);
     List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 3));
     deletes.onDocValuesUpdate(fieldInfo, updates);

[05/46] lucene-solr:jira/solr-11833: SOLR-12214: Leader may skip publish itself as ACTIVE even its last published state is DOWN

Posted by ab...@apache.org.

SOLR-12214: Leader may skip publish itself as ACTIVE even its last published state is DOWN


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/11d54b0c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/11d54b0c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/11d54b0c

Branch: refs/heads/jira/solr-11833
Commit: 11d54b0cc4fd1df567afe9f4690e0f5a8a55f1ab
Parents: 8d20fc5
Author: Cao Manh Dat <da...@apache.org>
Authored: Thu Apr 12 12:28:00 2018 +0700
Committer: Cao Manh Dat <da...@apache.org>
Committed: Thu Apr 12 12:28:00 2018 +0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                 |  1 +
 .../org/apache/solr/cloud/ElectionContext.java   |  3 ++-
 .../org/apache/solr/cloud/RecoveryStrategy.java  |  2 +-
 .../solr/cloud/LeaderVoteWaitTimeoutTest.java    | 19 ++++++++++---------
 4 files changed, 14 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/11d54b0c/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 360d663..da6642a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -142,6 +142,7 @@ Bug Fixes
 
 * SOLR-12190: Need to properly escape output in GraphMLResponseWriter. (yonik)
 
+* SOLR-12214: Leader may skip publish itself as ACTIVE when its last published state is DOWN (Cao Manh Dat)
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/11d54b0c/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index 7d64717..ae5c452 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -580,7 +580,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
         zkStateReader.forceUpdateCollection(collection);
         ClusterState clusterState = zkStateReader.getClusterState();
         Replica rep = getReplica(clusterState, collection, leaderProps.getStr(ZkStateReader.CORE_NODE_NAME_PROP));
-        if (rep != null && rep.getState() != Replica.State.ACTIVE) {
+        if (rep == null) return;
+        if (rep.getState() != Replica.State.ACTIVE || core.getCoreDescriptor().getCloudDescriptor().getLastPublished() != Replica.State.ACTIVE) {
           log.debug("We have become the leader after core registration but are not in an ACTIVE state - publishing ACTIVE");
           zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
         }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/11d54b0c/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
index 023b2e3..c8f5ae8 100644
--- a/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
@@ -749,7 +749,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
     while (true) {
       CloudDescriptor cloudDesc = coreDesc.getCloudDescriptor();
       DocCollection docCollection = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName());
-      if (mayPutReplicaAsDown && numTried == 1 &&
+      if (!isClosed() && mayPutReplicaAsDown && numTried == 1 &&
           docCollection.getReplica(coreDesc.getCloudDescriptor().getCoreNodeName()).getState() == Replica.State.ACTIVE) {
         // this operation may take a long time, by putting replica into DOWN state, client won't query this replica
         zkController.publish(coreDesc, Replica.State.DOWN);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/11d54b0c/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
index c9eb336..6d32c3a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderVoteWaitTimeoutTest.java
@@ -90,7 +90,6 @@ public class LeaderVoteWaitTimeoutTest extends SolrCloudTestCase {
   }
 
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 17-Mar-2018
   public void basicTest() throws Exception {
     final String collectionName = "basicTest";
     CollectionAdminRequest.createCollection(collectionName, 1, 1)
@@ -127,7 +126,6 @@ public class LeaderVoteWaitTimeoutTest extends SolrCloudTestCase {
   }
 
   @Test
-  @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 17-Mar-2018
   public void testMostInSyncReplicasCanWinElection() throws Exception {
     final String collectionName = "collection1";
     CollectionAdminRequest.createCollection(collectionName, 1, 3)
@@ -187,15 +185,18 @@ public class LeaderVoteWaitTimeoutTest extends SolrCloudTestCase {
     proxies.get(cluster.getJettySolrRunner(2)).reopen();
     cluster.getJettySolrRunner(0).stop();
 
-    // even replica2 joined election at the end of the queue, but it is the one with highest term
-    waitForState("Timeout waiting for new leader", collectionName, new CollectionStatePredicate() {
-      @Override
-      public boolean matches(Set<String> liveNodes, DocCollection collectionState) {
+    try {
+      // even replica2 joined election at the end of the queue, but it is the one with highest term
+      waitForState("Timeout waiting for new leader", collectionName, (liveNodes, collectionState) -> {
         Replica newLeader = collectionState.getSlice("shard1").getLeader();
         return newLeader.getName().equals(replica2.getName());
-      }
-    });
-
+      });
+    } catch (Exception e) {
+      List<String> children = zkClient().getChildren("/collections/"+collectionName+"/leader_elect/shard1/election",
+          null, true);
+      LOG.info("{} election nodes:{}", collectionName, children);
+      throw e;
+    }
     cluster.getJettySolrRunner(0).start();
     proxies.get(cluster.getJettySolrRunner(0)).reopen();

[04/46] lucene-solr:jira/solr-11833: SOLR-12190: properly escape output in GraphMLResponseWriter

Posted by ab...@apache.org.

SOLR-12190: properly escape output in GraphMLResponseWriter


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8d20fc57
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8d20fc57
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8d20fc57

Branch: refs/heads/jira/solr-11833
Commit: 8d20fc575bab2c4d0353bb34c6dc66566290f094
Parents: 8927d46
Author: yonik <yo...@apache.org>
Authored: Wed Apr 11 22:57:34 2018 -0400
Committer: yonik <yo...@apache.org>
Committed: Wed Apr 11 23:00:03 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                      |  3 +++
 .../apache/solr/response/GraphMLResponseWriter.java   | 14 +++++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d20fc57/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c07c1f7..360d663 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -139,6 +139,9 @@ Bug Fixes
 
 * SOLR-12201: TestReplicationHandler.doTestIndexFetchOnMasterRestart(): handle unexpected replication failures
   (Steve Rowe)
+
+* SOLR-12190: Need to properly escape output in GraphMLResponseWriter. (yonik)
+
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8d20fc57/solr/core/src/java/org/apache/solr/response/GraphMLResponseWriter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/response/GraphMLResponseWriter.java b/solr/core/src/java/org/apache/solr/response/GraphMLResponseWriter.java
index 7f2fac2..c41e2a2 100644
--- a/solr/core/src/java/org/apache/solr/response/GraphMLResponseWriter.java
+++ b/solr/core/src/java/org/apache/solr/response/GraphMLResponseWriter.java
@@ -97,7 +97,7 @@ public class GraphMLResponseWriter implements QueryResponseWriter {
           id = tuple.getString("collection") + "." + id;
         }
 
-        writer.write("<node id=\""+replace(id)+"\"");
+        printWriter.write("<node id=\""+ xmlEscape(id)+"\"");
 
         List<String> outfields = new ArrayList();
         Iterator<String> keys = tuple.fields.keySet().iterator();
@@ -115,7 +115,7 @@ public class GraphMLResponseWriter implements QueryResponseWriter {
           for (String nodeAttribute : outfields) {
             Object o = tuple.get(nodeAttribute);
             if (o != null) {
-              printWriter.println("<data key=\""+nodeAttribute+"\">" + o.toString() + "</data>");
+              printWriter.println("<data key=\"" + xmlEscape(nodeAttribute) + "\">" + xmlEscape(o.toString()) + "</data>");
             }
           }
           printWriter.println("</node>");
@@ -128,20 +128,20 @@ public class GraphMLResponseWriter implements QueryResponseWriter {
         if(ancestors != null) {
           for (String ancestor : ancestors) {
             ++edgeCount;
-            writer.write("<edge id=\"" + edgeCount + "\" ");
-            writer.write(" source=\"" + replace(ancestor) + "\" ");
-            printWriter.println(" target=\"" + replace(id) + "\"/>");
+            printWriter.write("<edge id=\"" + edgeCount + "\" ");
+            printWriter.write(" source=\"" + xmlEscape(ancestor) + "\" ");
+            printWriter.println(" target=\"" + xmlEscape(id) + "\"/>");
           }
         }
       }
 
-      writer.write("</graph></graphml>");
+      printWriter.write("</graph></graphml>");
     } finally {
       stream.close();
     }
   }
 
-  private String replace(String s) {
+  private String xmlEscape(String s) {
     if(s.indexOf(">") > -1) {
       s = s.replace(">", "&gt;");
     }

[39/46] lucene-solr:jira/solr-11833: SOLR-11731: one more decimal place (8) and we get the target/theoretical precision

Posted by ab...@apache.org.

SOLR-11731: one more decimal place (8) and we get the target/theoretical precision


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e4eb8a87
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e4eb8a87
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e4eb8a87

Branch: refs/heads/jira/solr-11833
Commit: e4eb8a870c342b49c8f0a0864d34617cac1deb0f
Parents: e2e89d1
Author: David Smiley <ds...@apache.org>
Authored: Fri Apr 13 17:05:51 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Fri Apr 13 17:05:51 2018 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                    |  2 +-
 .../apache/solr/schema/LatLonPointSpatialField.java | 15 +++++++++------
 .../org/apache/solr/search/TestSolr4Spatial2.java   | 16 ++++++++--------
 3 files changed, 18 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e4eb8a87/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 399a2a7..de748be 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -165,7 +165,7 @@ Optimizations
   a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher)
 
 * SOLR-11731: LatLonPointSpatialField can now decode points from docValues when stored=false docValues=true,
-  albeit with maximum precision of 1.33cm (Karthik Ramachandran, David Smiley)
+  albeit with maximum precision of 1.04cm (Karthik Ramachandran, David Smiley)
 
 * SOLR-11891: DocStreamer now respects the ReturnFields when populating a SolrDocument, reducing the
   number of unneccessary fields a ResponseWriter will see if documentCache is used (wei wang, hossman)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e4eb8a87/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java b/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
index f612a4a..dd3f586 100644
--- a/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
+++ b/solr/core/src/java/org/apache/solr/schema/LatLonPointSpatialField.java
@@ -19,6 +19,7 @@ package org.apache.solr.schema;
 
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.math.RoundingMode;
 import java.util.Objects;
 
 import org.apache.lucene.document.Field;
@@ -53,7 +54,7 @@ import static java.math.RoundingMode.CEILING;
 /**
  * A spatial implementation based on Lucene's {@code LatLonPoint} and {@code LatLonDocValuesField}. The
  * first is based on Lucene's "Points" API, which is a BKD Index.  This field type is strictly limited to
- * coordinates in lat/lon decimal degrees.  The accuracy is about a centimeter.
+ * coordinates in lat/lon decimal degrees.  The accuracy is about a centimeter (1.042cm).
  */
 // TODO once LLP & LLDVF are out of Lucene Sandbox, we should be able to javadoc reference them.
 public class LatLonPointSpatialField extends AbstractSpatialFieldType implements SchemaAware {
@@ -81,16 +82,18 @@ public class LatLonPointSpatialField extends AbstractSpatialFieldType implements
    * The encoding is governed by {@code LatLonDocValuesField}.  The decimal output representation is reflective
    * of the available precision.
    * @param value Non-null; stored location field data
-   * @return Non-null; "lat, lon" with 6 decimal point precision
+   * @return Non-null; "lat, lon"
    */
   public static String decodeDocValueToString(long value) {
     final double latDouble = GeoEncodingUtils.decodeLatitude((int) (value >> 32));
     final double lonDouble = GeoEncodingUtils.decodeLongitude((int) (value & 0xFFFFFFFFL));
-    // 7 decimal places maximizes our available precision to just over a centimeter; we have a test for it.
+    // This # decimal places maximizes our available precision to just over a centimeter; we have a test for it.
     // CEILING round-trips (decode then re-encode then decode to get identical results). Others did not. It also
-    //   reverses the "floor" that occurs when we encode.
-    BigDecimal latitudeDecoded = BigDecimal.valueOf(latDouble).setScale(7, CEILING);
-    BigDecimal longitudeDecoded = BigDecimal.valueOf(lonDouble).setScale(7, CEILING);
+    //   reverses the "floor" that occurred when we encoded.
+    final int DECIMAL_PLACES = 8;
+    final RoundingMode ROUND_MODE = CEILING;
+    BigDecimal latitudeDecoded = BigDecimal.valueOf(latDouble).setScale(DECIMAL_PLACES, ROUND_MODE);
+    BigDecimal longitudeDecoded = BigDecimal.valueOf(lonDouble).setScale(DECIMAL_PLACES, ROUND_MODE);
     return latitudeDecoded.stripTrailingZeros().toPlainString() + ","
         + longitudeDecoded.stripTrailingZeros().toPlainString();
     // return ((float)latDouble) + "," + ((float)lonDouble);  crude but not quite as accurate

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e4eb8a87/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
index 854e9c7..df1c8cd 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java
@@ -145,12 +145,12 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
     String ptOrig = GeoTestUtil.nextLatitude() + "," + GeoTestUtil.nextLongitude();
     assertU(adoc("id", "0", fld, ptOrig));
     assertU(commit());
-    // retrieve it (probably less precision
+    // retrieve it (probably less precision)
     String ptDecoded1 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
     // now write it back
     assertU(adoc("id", "0", fld, ptDecoded1));
     assertU(commit());
-    // retrieve it and hopefully the same
+    // retrieve it; assert that it's the same as written
     String ptDecoded2 = (String) client.query(params("q", "id:0")).getResults().get(0).get(fld);
     assertEquals("orig:" + ptOrig, ptDecoded1, ptDecoded2);
 
@@ -158,13 +158,13 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
     final Point ptOrigObj = SpatialUtils.parsePoint(ptOrig, SpatialContext.GEO);
     final Point ptDecodedObj = SpatialUtils.parsePoint(ptDecoded1, SpatialContext.GEO);
     double deltaCentimeters = SpatialContext.GEO.calcDistance(ptOrigObj, ptDecodedObj) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
-//    //See javadocs of LatLonDocValuesField
-//    final Point absErrorPt = SpatialContext.GEO.getShapeFactory().pointXY(8.381903171539307E-8, 4.190951585769653E-8);
-//    double deltaCentimetersMax
-//        = SpatialContext.GEO.calcDistance(absErrorPt, 0,0) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
-//    //  equals 1.0420371840922256   which is a bit lower than what we're able to do
+    //See javadocs of LatLonDocValuesField for these constants
+    final Point absErrorPt = SpatialContext.GEO.getShapeFactory().pointXY(8.381903171539307E-8, 4.190951585769653E-8);
+    double deltaCentimetersMax
+        = SpatialContext.GEO.calcDistance(absErrorPt, 0,0) * DistanceUtils.DEG_TO_KM * 1000.0 * 100.0;
+    assertEquals(1.0420371840922256, deltaCentimetersMax, 0.0);// just so that we see it in black & white in the test
 
-    assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters < 1.33);
+    assertTrue("deltaCm too high: " + deltaCentimeters, deltaCentimeters <= deltaCentimetersMax);
   }
 
   @Test