You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2011/03/15 22:35:35 UTC

svn commit: r1081952 [11/17] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/demo/ dev-tools/idea/luc...

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java Tue Mar 15 21:35:17 2011
@@ -19,24 +19,21 @@ package org.apache.lucene.collation;
 
 
 import com.ibm.icu.text.Collator;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.Tokenizer;
-
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.collation.CollationKeyAnalyzer; // javadocs
+import org.apache.lucene.util.IndexableBinaryStringTools; // javadocs
+import org.apache.lucene.util.Version;
 
 import java.io.Reader;
-import java.io.IOException;
-
 
 /**
  * <p>
  *   Filters {@link KeywordTokenizer} with {@link ICUCollationKeyFilter}.
  * <p>
  *   Converts the token into its {@link com.ibm.icu.text.CollationKey}, and
- *   then encodes the CollationKey with 
- *   {@link org.apache.lucene.util.IndexableBinaryStringTools}, to allow it to
+ *   then encodes the CollationKey either directly or with 
+ *   {@link IndexableBinaryStringTools} (see <a href="#version">below</a>), to allow it to
  *   be stored as an index term.
  * </p>
  * <p>
@@ -70,39 +67,48 @@ import java.io.IOException;
  *   generation timing and key length comparisons between ICU4J and
  *   java.text.Collator over several languages.
  * </p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ICUCollationKeyAnalyzer:
+ * <ul>
+ *   <li> As of 4.0, Collation Keys are directly encoded as bytes. Previous
+ *   versions will encode the bytes with {@link IndexableBinaryStringTools}.
+ * </ul>
  */
-public final class ICUCollationKeyAnalyzer extends Analyzer {
-  private Collator collator;
-
-  public ICUCollationKeyAnalyzer(Collator collator) {
+public final class ICUCollationKeyAnalyzer extends ReusableAnalyzerBase {
+  private final Collator collator;
+  private final ICUCollationAttributeFactory factory;
+  private final Version matchVersion;
+
+  /**
+   * Create a new ICUCollationKeyAnalyzer, using the specified collator.
+   * 
+   * @param matchVersion See <a href="#version">above</a>
+   * @param collator CollationKey generator
+   */
+  public ICUCollationKeyAnalyzer(Version matchVersion, Collator collator) {
+    this.matchVersion = matchVersion;
     this.collator = collator;
+    this.factory = new ICUCollationAttributeFactory(collator);
   }
 
-  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new KeywordTokenizer(reader);
-    result = new ICUCollationKeyFilter(result, collator);
-    return result;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
+  /**
+   * @deprecated Use {@link ICUCollationKeyAnalyzer#ICUCollationKeyAnalyzer(Version, Collator)}
+   *   and specify a version instead. This ctor will be removed in Lucene 5.0
+   */
+  @Deprecated
+  public ICUCollationKeyAnalyzer(Collator collator) {
+    this(Version.LUCENE_31, collator);
   }
-  
+
   @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) 
-    throws IOException {
-    
-    SavedStreams streams = (SavedStreams)getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new KeywordTokenizer(reader);
-      streams.result = new ICUCollationKeyFilter(streams.source, collator);
-      setPreviousTokenStream(streams);
+  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    if (matchVersion.onOrAfter(Version.LUCENE_40)) {
+      KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+      return new TokenStreamComponents(tokenizer, tokenizer);
     } else {
-      streams.source.reset(reader);
+      KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
+      return new TokenStreamComponents(tokenizer, new ICUCollationKeyFilter(tokenizer, collator));
     }
-    return streams.result;
   }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java Tue Mar 15 21:35:17 2011
@@ -68,7 +68,10 @@ import java.io.IOException;
  *   generation timing and key length comparisons between ICU4J and
  *   java.text.Collator over several languages.
  * </p>
+ *  @deprecated Use {@link ICUCollationAttributeFactory} instead, which encodes
+ *  terms directly as bytes. This filter will be removed in Lucene 5.0
  */
+@Deprecated
 public final class ICUCollationKeyFilter extends TokenFilter {
   private Collator collator = null;
   private RawCollationKey reusableKey = new RawCollationKey();
@@ -81,7 +84,12 @@ public final class ICUCollationKeyFilter
    */
   public ICUCollationKeyFilter(TokenStream input, Collator collator) {
     super(input);
-    this.collator = collator;
+    // clone the collator: see http://userguide.icu-project.org/collation/architecture
+    try {
+      this.collator = (Collator) collator.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   @Override

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/overview.html?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/overview.html (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/overview.html Tue Mar 15 21:35:17 2011
@@ -66,12 +66,12 @@ algorithm.
 </ul>
 <h2>Example Usages</h2>
 <h3>Tokenizing multilanguage text</h3>
-<code><pre>
+<pre class="prettyprint">
   /**
    * This tokenizer will work well in general for most languages.
    */
   Tokenizer tokenizer = new ICUTokenizer(reader);
-</pre></code>
+</pre>
 <hr/>
 <h1><a name="collation">Collation</a></h1>
 <p>
@@ -111,12 +111,11 @@ algorithm.
 <h2>Example Usages</h2>
 
 <h3>Farsi Range Queries</h3>
-<code><pre>
-  Collator collator = Collator.getInstance(new Locale("ar"));
-  ICUCollationKeyAnalyzer analyzer = new ICUCollationKeyAnalyzer(collator);
+<pre class="prettyprint">
+  Collator collator = Collator.getInstance(new ULocale("ar"));
+  ICUCollationKeyAnalyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator);
   RAMDirectory ramDir = new RAMDirectory();
-  IndexWriter writer = new IndexWriter
-    (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
   Document doc = new Document();
   doc.add(new Field("content", "\u0633\u0627\u0628", 
                     Field.Store.YES, Field.Index.ANALYZED));
@@ -124,12 +123,9 @@ algorithm.
   writer.close();
   IndexSearcher is = new IndexSearcher(ramDir, true);
 
-  // The AnalyzingQueryParser in Lucene's contrib allows terms in range queries
-  // to be passed through an analyzer - Lucene's standard QueryParser does not
-  // allow this.
-  AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
-  aqp.setLowercaseExpandedTerms(false);
-  
+  QueryParser aqp = new QueryParser(Version.LUCENE_40, "content", analyzer);
+  aqp.setAnalyzeRangeTerms(true);
+    
   // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
   // orders the U+0698 character before the U+0633 character, so the single
   // indexed Term above should NOT be returned by a ConstantScoreRangeQuery
@@ -138,15 +134,14 @@ algorithm.
   ScoreDoc[] result
     = is.search(aqp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
   assertEquals("The index Term should not be included.", 0, result.length);
-</pre></code>
+</pre>
 
 <h3>Danish Sorting</h3>
-<code><pre>
+<pre class="prettyprint">
   Analyzer analyzer 
-    = new ICUCollationKeyAnalyzer(Collator.getInstance(new Locale("da", "dk")));
+    = new ICUCollationKeyAnalyzer(Version.LUCENE_40, Collator.getInstance(new ULocale("da", "dk")));
   RAMDirectory indexStore = new RAMDirectory();
-  IndexWriter writer = new IndexWriter 
-    (indexStore, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+  IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_40, analyzer));
   String[] tracer = new String[] { "A", "B", "C", "D", "E" };
   String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
   String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
@@ -157,7 +152,7 @@ algorithm.
     writer.addDocument(doc);
   }
   writer.close();
-  Searcher searcher = new IndexSearcher(indexStore, true);
+  IndexSearcher searcher = new IndexSearcher(indexStore, true);
   Sort sort = new Sort();
   sort.setSort(new SortField("contents", SortField.STRING));
   Query query = new MatchAllDocsQuery();
@@ -166,26 +161,25 @@ algorithm.
     Document doc = searcher.doc(result[i].doc);
     assertEquals(sortedTracerOrder[i], doc.getValues("tracer")[0]);
   }
-</pre></code>
+</pre>
 
 <h3>Turkish Case Normalization</h3>
-<code><pre>
-  Collator collator = Collator.getInstance(new Locale("tr", "TR"));
+<pre class="prettyprint">
+  Collator collator = Collator.getInstance(new ULocale("tr", "TR"));
   collator.setStrength(Collator.PRIMARY);
-  Analyzer analyzer = new ICUCollationKeyAnalyzer(collator);
+  Analyzer analyzer = new ICUCollationKeyAnalyzer(Version.LUCENE_40, collator);
   RAMDirectory ramDir = new RAMDirectory();
-  IndexWriter writer = new IndexWriter
-    (ramDir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
+  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
   Document doc = new Document();
   doc.add(new Field("contents", "DIGY", Field.Store.NO, Field.Index.ANALYZED));
   writer.addDocument(doc);
   writer.close();
   IndexSearcher is = new IndexSearcher(ramDir, true);
-  QueryParser parser = new QueryParser("contents", analyzer);
+  QueryParser parser = new QueryParser(Version.LUCENE_40, "contents", analyzer);
   Query query = parser.parse("d\u0131gy");   // U+0131: dotless i
   ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
   assertEquals("The index Term should be included.", 1, result.length);
-</pre></code>
+</pre>
 
 <h2>Caveats and Comparisons</h2>
 <p>
@@ -245,7 +239,7 @@ algorithm.
 </ul>
 <h2>Example Usages</h2>
 <h3>Normalizing text to NFC</h3>
-<code><pre>
+<pre class="prettyprint">
   /**
    * Normalizer2 objects are unmodifiable and immutable.
    */
@@ -254,7 +248,7 @@ algorithm.
    * This filter will normalize to NFC.
    */
   TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer, normalizer);
-</pre></code>
+</pre>
 <hr/>
 <h1><a name="casefolding">Case Folding</a></h1>
 <p>
@@ -284,12 +278,12 @@ this integration. To perform case-foldin
 </ul>
 <h2>Example Usages</h2>
 <h3>Lowercasing text</h3>
-<code><pre>
+<pre class="prettyprint">
   /**
    * This filter will case-fold and normalize to NFKC.
    */
   TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer);
-</pre></code>
+</pre>
 <hr/>
 <h1><a name="searchfolding">Search Term Folding</a></h1>
 <p>
@@ -311,13 +305,13 @@ many character foldings recursively.
 </ul>
 <h2>Example Usages</h2>
 <h3>Removing accents</h3>
-<code><pre>
+<pre class="prettyprint">
   /**
    * This filter will case-fold, remove accents and other distinctions, and
    * normalize to NFKC.
    */
   TokenStream tokenstream = new ICUFoldingFilter(tokenizer);
-</pre></code>
+</pre>
 <hr/>
 <h1><a name="transform">Text Transformation</a></h1>
 <p>
@@ -341,19 +335,19 @@ and 
 </ul>
 <h2>Example Usages</h2>
 <h3>Convert Traditional to Simplified</h3>
-<code><pre>
+<pre class="prettyprint">
   /**
    * This filter will map Traditional Chinese to Simplified Chinese
    */
   TokenStream tokenstream = new ICUTransformFilter(tokenizer, Transliterator.getInstance("Traditional-Simplified"));
-</pre></code>
+</pre>
 <h3>Transliterate Serbian Cyrillic to Serbian Latin</h3>
-  <code><pre>
+<pre class="prettyprint">
   /**
    * This filter will map Serbian Cyrillic to Serbian Latin according to BGN rules
    */
   TokenStream tokenstream = new ICUTransformFilter(tokenizer, Transliterator.getInstance("Serbian-Latin/BGN"));
-</pre></code>
+</pre>
 <hr/>
 <h1><a name="backcompat">Backwards Compatibility</a></h1>
 <p>
@@ -365,7 +359,7 @@ a specific Unicode Version by using a {@
 </p>
 <h2>Example Usages</h2>
 <h3>Restricting normalization to Unicode 5.0</h3>
-<code><pre>
+<pre class="prettyprint">
   /**
    * This filter will do NFC normalization, but will ignore any characters that
    * did not exist as of Unicode 5.0. Because of the normalization stability policy
@@ -377,6 +371,6 @@ a specific Unicode Version by using a {@
     set.freeze(); 
     FilteredNormalizer2 unicode50 = new FilteredNormalizer2(normalizer, set);
     TokenStream tokenstream = new ICUNormalizer2Filter(tokenizer, unicode50);
-</pre></code>
+</pre>
 </body>
 </html>

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java Tue Mar 15 21:35:17 2011
@@ -20,6 +20,8 @@ package org.apache.lucene.collation;
 
 import com.ibm.icu.text.Collator;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.util.BytesRef;
 
 import java.util.Locale;
 
@@ -27,17 +29,23 @@ import java.util.Locale;
 public class TestICUCollationKeyAnalyzer extends CollationTestBase {
 
   private Collator collator = Collator.getInstance(new Locale("fa"));
-  private Analyzer analyzer = new ICUCollationKeyAnalyzer(collator);
+  private Analyzer analyzer = new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator);
 
-  private String firstRangeBeginning = encodeCollationKey
+  private BytesRef firstRangeBeginning = new BytesRef
     (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
-  private String firstRangeEnd = encodeCollationKey
+  private BytesRef firstRangeEnd = new BytesRef
     (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
-  private String secondRangeBeginning = encodeCollationKey
+  private BytesRef secondRangeBeginning = new BytesRef
     (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
-  private String secondRangeEnd = encodeCollationKey
+  private BytesRef secondRangeEnd = new BytesRef
     (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
-  
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    assumeFalse("preflex format only supports UTF-8 encoded bytes", "PreFlex".equals(CodecProvider.getDefault().getDefaultFieldCodec()));
+  }
+
   public void testFarsiRangeFilterCollating() throws Exception {
     testFarsiRangeFilterCollating(analyzer, firstRangeBeginning, firstRangeEnd, 
                                   secondRangeBeginning, secondRangeEnd);
@@ -62,13 +70,13 @@ public class TestICUCollationKeyAnalyzer
   //  
   public void testCollationKeySort() throws Exception {
     Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
-      (Collator.getInstance(Locale.US));
+      (TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
     Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
-      (Collator.getInstance(Locale.FRANCE));
+      (TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
     Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
-      (Collator.getInstance(new Locale("sv", "se")));
+      (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("sv", "se")));
     Analyzer denmarkAnalyzer = new ICUCollationKeyAnalyzer
-      (Collator.getInstance(new Locale("da", "dk")));
+      (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
 
     // The ICU Collator and java.text.Collator implementations differ in their
     // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
@@ -76,4 +84,14 @@ public class TestICUCollationKeyAnalyzer
     (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, 
      "BFJHD", "ECAGI", "BJDFH", "BJDHF");
   }
+  
+  public void testThreadSafe() throws Exception {
+    int iters = 20 * RANDOM_MULTIPLIER;
+    for (int i = 0; i < iters; i++) {
+      Locale locale = randomLocale(random);
+      Collator collator = Collator.getInstance(locale);
+      collator.setStrength(Collator.IDENTICAL);
+      assertThreadSafe(new ICUCollationKeyAnalyzer(TEST_VERSION_CURRENT, collator));
+    }
+  }
 }

Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java Tue Mar 15 21:35:17 2011
@@ -22,24 +22,26 @@ import com.ibm.icu.text.Collator;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.util.BytesRef;
 
 import java.io.Reader;
 import java.util.Locale;
 
-
+/** @deprecated remove this when ICUCollationKeyFilter is removed */
+@Deprecated
 public class TestICUCollationKeyFilter extends CollationTestBase {
 
   private Collator collator = Collator.getInstance(new Locale("fa"));
   private Analyzer analyzer = new TestAnalyzer(collator);
 
-  private String firstRangeBeginning = encodeCollationKey
-    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
-  private String firstRangeEnd = encodeCollationKey
-    (collator.getCollationKey(firstRangeEndOriginal).toByteArray());
-  private String secondRangeBeginning = encodeCollationKey
-    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
-  private String secondRangeEnd = encodeCollationKey
-    (collator.getCollationKey(secondRangeEndOriginal).toByteArray());
+  private BytesRef firstRangeBeginning = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(firstRangeBeginningOriginal).toByteArray()));
+  private BytesRef firstRangeEnd = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(firstRangeEndOriginal).toByteArray()));
+  private BytesRef secondRangeBeginning = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(secondRangeBeginningOriginal).toByteArray()));
+  private BytesRef secondRangeEnd = new BytesRef(encodeCollationKey
+    (collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
 
   
   public final class TestAnalyzer extends Analyzer {

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt Tue Mar 15 21:35:17 2011
@@ -2,12 +2,21 @@ Lucene Benchmark Contrib Change Log
 
 The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
 
+03/10/2011
+  LUCENE-2961: Removed lib/xml-apis.jar, since JVM 1.5+ already contains the
+  JAXP 1.3 interface classes it provides.
+
 02/05/2011
   LUCENE-1540: Improvements to contrib.benchmark for TREC collections. 
   ContentSource can now process plain text files, gzip files, and bzip2 files.
   TREC doc parsing now handles the TREC gov2 collection and TREC disks 4&5-CR 
   collection (both used by many TREC tasks). (Shai Erera, Doron Cohen)
-  
+
+01/31/2011
+  LUCENE-1591: Rollback to xerces-2.9.1-patched-XERCESJ-1257.jar to workaround
+  XERCESJ-1257, which we hit on current Wikipedia XML export
+  (ENWIKI-20110115-pages-articles.xml) with xerces-2.10.0.jar.   (Mike McCandless)
+
 01/26/2011
   LUCENE-929: ExtractReuters first extracts to a tmp dir and then renames. That 
   way, if a previous extract attempt failed, "ant extract-reuters" will still 
@@ -33,7 +42,7 @@ The Benchmark contrib package contains c
 
 4/27/2010: WriteLineDocTask now supports multi-threading. Also, 
   StringBufferReader was renamed to StringBuilderReader and works on 
-  StringBuilder now. In addition, LongToEnglishCountentSource starts from 0
+  StringBuilder now. In addition, LongToEnglishContentSource starts from 0
   (instead of Long.MIN_VAL+10) and wraps around to MIN_VAL (if you ever hit 
   Long.MAX_VAL). (Shai Erera)
 

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/build.xml?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/build.xml Tue Mar 15 21:35:17 2011
@@ -22,7 +22,6 @@
     <module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
       property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
     <contrib-uptodate name="memory" property="memory.uptodate" classpath.property="memory.jar"/>
-    <contrib-uptodate name="demo" property="demo.uptodate" classpath.property="demo.jar"/>
 
     <target name="check-files">
         <available file="temp/news20.tar.gz" property="news20.exists"/>
@@ -143,7 +142,6 @@
       <pathelement path="${memory.jar}"/>
       <pathelement path="${highlighter.jar}"/>
       <pathelement path="${analyzers-common.jar}"/>
-      <pathelement path="${demo.jar}"/>
       <path refid="base.classpath"/>
     	<fileset dir="lib">
     		<include name="**/*.jar"/>
@@ -232,11 +230,6 @@
       <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
     </target>
 
-    <target name="compile-demo" unless="demo.uptodate">
-      <subant target="default">
-         <fileset dir="${common.dir}/contrib/demo" includes="build.xml"/>
-      </subant>
-    </target>
     <target name="compile-highlighter" unless="highlighter.uptodate">
       <subant target="default">
          <fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/>
@@ -258,7 +251,22 @@
       </subant>
     </target>
 
-    <target name="init" depends="contrib-build.init,compile-demo,compile-memory,compile-highlighter,compile-analyzers-common"/>
+    <target name="init" depends="contrib-build.init,compile-memory,compile-highlighter,compile-analyzers-common"/>
   
-    <target name="dist-maven" depends="jar-core,javadocs,contrib-build.dist-maven"/>
+    <target name="clean-javacc">
+      <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
+	<containsregexp expression="Generated.*By.*JavaCC"/>
+      </fileset>
+    </target>
+    
+    <target name="javacc" depends="init,javacc-check" if="javacc.present">
+      <invoke-javacc target="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj"
+                     outputDir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml"
+		     />
+    </target>
+    
+    <target name="dist-maven" depends="jar-core,javadocs,contrib-build.dist-maven">
+      <m2-deploy-with-pom-template pom.xml="lib/lucene-xercesImpl-pom.xml.template"
+                                   jar.file="lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar" />
+    </target>
 </project>

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java Tue Mar 15 21:35:17 2011
@@ -30,7 +30,7 @@ import java.util.Properties;
 public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser {
 
   public DocData parse(DocData docData, String name, Date date, String title, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException {
-    org.apache.lucene.demo.html.HTMLParser p = new org.apache.lucene.demo.html.HTMLParser(reader);
+    org.apache.lucene.benchmark.byTask.feeds.demohtml.HTMLParser p = new org.apache.lucene.benchmark.byTask.feeds.demohtml.HTMLParser(reader);
     
     // title
     if (title==null) {

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java Tue Mar 15 21:35:17 2011
@@ -124,7 +124,7 @@ public class QualityQueriesFinder {
   
   private static class TermsDfQueue extends PriorityQueue<TermDf> {
     TermsDfQueue (int maxSize) {
-      initialize(maxSize);
+      super(maxSize);
     }
     @Override
     protected boolean lessThan(TermDf tf1, TermDf tf2) {

Modified: lucene/dev/branches/bulkpostings/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/CHANGES.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/CHANGES.txt Tue Mar 15 21:35:17 2011
@@ -28,9 +28,10 @@ Apache Lucene trunk
 Apache Tika 0.8-SNAPSHOT
 Carrot2 3.4.2
 Velocity 1.6.4 and Velocity Tools 2.0
+Apache UIMA 2.3.1-SNAPSHOT
 
 
-Upgrading from Solr 3.1-dev
+Upgrading from Solr 3.2-dev
 ----------------------
 
 * The Lucene index format has changed and as a result, once you upgrade, 
@@ -53,14 +54,6 @@ Upgrading from Solr 3.1-dev
   legacy behavior should set a default value for the 'mm' param in
   their solrconfig.xml file.
 
-* In previous releases, sorting on fields that are "multiValued" 
-  (either by explicit declaration in schema.xml or by implict behavior
-  because the "version" attribute on the schema was less then 1.2) did 
-  not generally work, but it would sometimes silently act as if it
-  succeeded and order the docs arbitrarily.  Solr will now fail on any
-  attempt to sort on a multivalued field
-
-
 
 Detailed Change List
 ----------------------
@@ -161,26 +154,11 @@ Bug Fixes
 * SOLR-2275: fix DisMax 'mm' parsing to be tolerant of whitespace
   (Erick Erickson via hossman)
 
-* SOLR-2307: fix bug in PHPSerializedResponseWriter (wt=phps) when
-  dealing with SolrDocumentList objects -- ie: sharded queries.
-  (Antonio Verni via hossman)
-  
-* SOLR-2127: Fixed serialization of default core and indentation of solr.xml when serializing.
-  (Ephraim Ofir, Mark Miller)
-
-* SOLR-482: Provide more exception handling in CSVLoader (gsingers)
-
-* SOLR-2320: Fixed ReplicationHandler detail reporting for masters
-  (hossman)
-
-* SOLR-2085: Improve SolrJ behavior when FacetComponent comes before
-  QueryComponent (Tomas Salfischberger via hossman)
-
-* SOLR-1940: Fix SolrDispatchFilter behavior when Content-Type is
-  unknown (Lance Norskog and hossman)
-
-* SOLR-2339: Fix sorting to explicitly generate an error if you
-  attempt to sort on a multiValued field. (hossman)
+* SOLR-309: Fix FieldType so setting an analyzer on a FieldType that
+  doesn't expect it will generate an error.  Practically speaking this
+  means that Solr will now correctly generate an error on
+  initialization if the schema.xml contains an analyzer configuration
+  for a fieldType that does not use TextField.  (hossman) 
 
 Other Changes
 ----------------------
@@ -203,9 +181,14 @@ Other Changes
   using Generics where applicable in method/object declatations, and
   adding @SuppressWarnings("unchecked") when appropriate (hossman)
 
-* SOLR-2350: Since Solr no longer requires XML files to be in UTF-8
-  (see SOLR-96) SimplePostTool (aka: post.jar) has been improved to
-  work with files of any mime-type or charset. (hossman)
+* SOLR-2375: Suggester Lookup implementations now store trie data
+  and load it back on init. This means that large tries don't have to be
+  rebuilt on every commit or core reload. (ab)
+
+* SOLR-2413: Support for returning multi-valued fields w/o <arr> tag 
+  in the XMLResponseWriter was removed.  XMLResponseWriter only 
+  no longer work with values less then 2.2 (ryan)
+
 
 Documentation
 ----------------------
@@ -213,6 +196,15 @@ Documentation
 * SOLR-2232: Improved README info on solr.solr.home in examples
   (Eric Pugh and hossman)
 
+==================  3.2.0-dev ==================
+Versions of Major Components
+---------------------
+Apache Lucene trunk
+Apache Tika 0.8-SNAPSHOT
+Carrot2 3.4.2
+
+(No Changes)
+
 ==================  3.1.0-dev ==================
 Versions of Major Components
 ---------------------
@@ -264,6 +256,19 @@ Upgrading from Solr 1.4
 
 * readercycle script was removed. (SOLR-2046)
 
+* In previous releases, sorting or evaluating function queries on 
+  fields that were "multiValued" (either by explicit declaration in
+  schema.xml or by implict behavior because the "version" attribute on
+  the schema was less then 1.2) did not generally work, but it would
+  sometimes silently act as if it succeeded and order the docs
+  arbitrarily.  Solr will now fail on any attempt to sort, or apply a
+  function to, multi-valued fields 
+
+* The DataImportHandler jars are no longer included in the solr
+  WAR and should be added in Solr's lib directory, or referenced
+  via the <lib> directive in solrconfig.xml.
+
+
 Detailed Change List
 ----------------------
 
@@ -279,7 +284,7 @@ New Features
 * SOLR-1553: New dismax parser implementation (accessible as "edismax")
   that supports full lucene syntax, improved reserved char escaping,
   fielded queries, improved proximity boosting, and improved stopword
-  handling. (yonik)
+  handling. Note: status is experimental for now. (yonik)
 
 * SOLR-1574: Add many new functions from java Math (e.g. sin, cos) (yonik)
 
@@ -393,7 +398,7 @@ New Features
 * SOLR-1240: "Range Faceting" has been added.  This is a generalization
   of the existing "Date Faceting" logic so that it now supports any
   all stock numeric field types that support range queries in addition
-  to dates.  
+  to dates. facet.date is now deprecated in favor of this generalized mechanism.
   (Gijs Kunze, hossman)
 
 * SOLR-2021: Add SolrEncoder plugin to Highlighter. (koji)
@@ -669,6 +674,27 @@ Bug Fixes
 * SOLR-2261: fix velocity template layout.vm that referred to an older
   version of jquery.  (Eric Pugh via rmuir)
 
+* SOLR-2307: fix bug in PHPSerializedResponseWriter (wt=phps) when
+  dealing with SolrDocumentList objects -- ie: sharded queries.
+  (Antonio Verni via hossman)
+
+* SOLR-2127: Fixed serialization of default core and indentation of solr.xml when serializing.
+  (Ephraim Ofir, Mark Miller)
+
+* SOLR-2320: Fixed ReplicationHandler detail reporting for masters
+  (hossman)
+
+* SOLR-482: Provide more exception handling in CSVLoader (gsingers)
+
+* SOLR-1283: HTMLStripCharFilter sometimes threw a "Mark Invalid" exception. 
+  (Julien Coloos, hossman, yonik)
+
+* SOLR-2085: Improve SolrJ behavior when FacetComponent comes before
+  QueryComponent (Tomas Salfischberger via hossman)
+
+* SOLR-1940: Fix SolrDispatchFilter behavior when Content-Type is
+  unknown (Lance Norskog and hossman)
+
 * SOLR-1983: snappuller fails when modifiedConfFiles is not empty and
   full copy of index is needed. (Alexander Kanarsky via yonik)
 
@@ -677,7 +703,36 @@ Bug Fixes
 
 * SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and
   DocumentAnalysisRequestHandler to respect charset from XML file and only
-  use HTTP header's "Content-Type" as a "hint". (Uwe Schindler)
+  use HTTP header's "Content-Type" as a "hint". (uschindler)
+
+* SOLR-2339: Fix sorting to explicitly generate an error if you
+  attempt to sort on a multiValued field. (hossman)
+
+* SOLR-2348: Fix field types to explicitly generate an error if you
+  attempt to get a ValueSource for a multiValued field. (hossman)
+
+* SOLR-1191: resolve DataImportHandler deltaQuery column against pk when pk
+  has a prefix (e.g. pk="book.id" deltaQuery="select id from ..."). More
+  useful error reporting when no match found (previously failed with a
+  NullPointerException in log and no clear user feedback). (gthb via yonik)
+
+* SOLR-2380: Distributed faceting could miss values when facet.sort=index
+  and when facet.offset was greater than 0. (yonik)
+  
+* SOLR-1656: XIncludes and other HREFs in XML files loaded by ResourceLoader
+  are fixed to be resolved using the URI standard (RFC 2396). The system 
+  identifier is no longer a plain filename with path, it gets initialized 
+  using a custom URI scheme "solrres:". This scheme is resolved using a 
+  EntityResolver that utilizes ResourceLoader 
+  (org.apache.solr.common.util.SystemIdResolver). This makes all relative 
+  pathes in Solr's config files behave like expected. This change 
+  introduces some backwards breaks in the API: Some config classes 
+  (Config, SolrConfig, IndexSchema) were changed to take 
+  org.xml.sax.InputSource instead of InputStream. There may also be some 
+  backwards breaks in existing config files, it is recommended to check 
+  your config files / XSLTs and replace all XIncludes/HREFs that were 
+  hacked to use absolute paths to use relative ones. (uschindler)
+
 
 Other Changes
 ----------------------
@@ -777,6 +832,31 @@ Other Changes
 * SOLR-2340: Add version infos to message in JavaBinCodec when throwing
   exception. (koji)
 
+* SOLR-2350: Since Solr no longer requires XML files to be in UTF-8
+  (see SOLR-96) SimplePostTool (aka: post.jar) has been improved to
+  work with files of any mime-type or charset. (hossman)
+
+* SOLR-2365: Move DIH jars out of solr.war (David Smiley via yonik)
+
+* SOLR-2381: Include a patched version of Jetty (6.1.26 + JETTY-1340) 
+  to fix problematic UTF-8 handling for supplementary characters.
+  (Bernd Fehling, uschindler, yonik, rmuir)
+
+* SOLR-2391: The preferred Content-Type for XML was changed to
+  application/xml. XMLResponseWriter now only delivers using this
+  type; updating documents and analyzing documents is still supported
+  using text/xml as Content-Type, too. If you have clients that are
+  hardcoded on text/xml as Content-Type, you have to change them.
+  (uschindler, rmuir)
+
+* SOLR-2414: All ResponseWriters now use only ServletOutputStreams
+  and wrap their own Writer around it when serializing. This fixes
+  the bug in PHPSerializedResponseWriter that produced wrong string
+  length if the servlet container had a broken UTF-8 encoding that was
+  in fact CESU-8 (see SOLR-1091). The system property to enable the
+  CESU-8 byte counting in PHPSerializesResponseWriters for broken
+  servlet containers was therefore removed and is now ignored if set.
+  Output is always UTF-8.  (uschindler, yonik, rmuir)
 
 Build
 ----------------------
@@ -793,7 +873,6 @@ Build
 * LUCENE-2657: Switch from using Maven POM templates to full POMs when
   generating Maven artifacts (Steven Rowe)
 
-
 Documentation
 ----------------------
 
@@ -911,8 +990,7 @@ New Features
  7. SOLR-680: Add StatsComponent. This gets simple statistics on matched numeric fields,
     including: min, max, mean, median, stddev.  (koji, ryan)
 
-    7.1 SOLR-1380: Added support for multi-valued fields to stats component
-        (Harish Agarwal via gsingers)
+    7.1 SOLR-1380: Added support for multi-valued fields (Harish Agarwal via gsingers)
 
  8. SOLR-561: Added Replication implemented in Java as a request handler. Supports index replication
     as well as configuration replication and exposes detailed statistics and progress information

Modified: lucene/dev/branches/bulkpostings/solr/LICENSE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/LICENSE.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/LICENSE.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/LICENSE.txt Tue Mar 15 21:35:17 2011
@@ -261,9 +261,9 @@ such code.
 1.13. You (or Your) means an individual or a legal entity exercising rights
 under, and complying with all of the terms of, this License. For legal
 entities, You includes any entity which controls, is controlled by, or is under
-common control with You. For purposes of this definition, control means (a)�the
+common control with You. For purposes of this definition, control means (a) the
 power, direct or indirect, to cause the direction or management of such entity,
-whether by contract or otherwise, or (b)�ownership of more than fifty percent
+whether by contract or otherwise, or (b) ownership of more than fifty percent
 (50%) of the outstanding shares or beneficial ownership of such entity.
 
 2. License Grants.
@@ -278,12 +278,12 @@ with or without Modifications, and/or as
 Patent Claims infringed by the making, using or selling of Original Software,
 to make, have made, use, practice, sell, and offer for sale, and/or otherwise
 dispose of the Original Software (or portions thereof).  (c) The licenses
-granted in Sections�2.1(a) and (b) are effective on the date Initial Developer
+granted in Sections 2.1(a) and (b) are effective on the date Initial Developer
 first distributes or otherwise makes the Original Software available to a third
-party under the terms of this License.  (d) Notwithstanding Section�2.1(b)
-above, no patent license is granted: (1)�for code that You delete from the
-Original Software, or (2)�for infringements caused by: (i)�the modification of
-the Original Software, or (ii)�the combination of the Original Software with
+party under the terms of this License.  (d) Notwithstanding Section 2.1(b)
+above, no patent license is granted: (1) for code that You delete from the
+Original Software, or (2) for infringements caused by: (i) the modification of
+the Original Software, or (ii) the combination of the Original Software with
 other software or devices.
 
 2.2. Contributor Grant.  Conditioned upon Your compliance with Section 3.1
@@ -297,17 +297,17 @@ and/or as part of a Larger Work; and (b)
 making, using, or selling of Modifications made by that Contributor either
 alone and/or in combination with its Contributor Version (or portions of such
 combination), to make, use, sell, offer for sale, have made, and/or otherwise
-dispose of: (1)�Modifications made by that Contributor (or portions thereof);
-and (2)�the combination of Modifications made by that Contributor with its
+dispose of: (1) Modifications made by that Contributor (or portions thereof);
+and (2) the combination of Modifications made by that Contributor with its
 Contributor Version (or portions of such combination).  (c) The licenses
-granted in Sections�2.2(a) and 2.2(b) are effective on the date Contributor
+granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor
 first distributes or otherwise makes the Modifications available to a third
-party.  (d) Notwithstanding Section�2.2(b) above, no patent license is granted:
-(1)�for any code that Contributor has deleted from the Contributor Version;
-(2)�for infringements caused by: (i)�third party modifications of Contributor
-Version, or (ii)�the combination of Modifications made by that Contributor with
+party.  (d) Notwithstanding Section 2.2(b) above, no patent license is granted:
+(1) for any code that Contributor has deleted from the Contributor Version;
+(2) for infringements caused by: (i) third party modifications of Contributor
+Version, or (ii) the combination of Modifications made by that Contributor with
 other software (except as part of the Contributor Version) or other devices; or
-(3)�under Patent Claims infringed by Covered Software in the absence of
+(3) under Patent Claims infringed by Covered Software in the absence of
 Modifications made by that Contributor.
 
 3. Distribution Obligations.
@@ -389,9 +389,9 @@ License published by the license steward
 
 When You are an Initial Developer and You want to create a new license for Your
 Original Software, You may create and use a modified version of this License if
-You: (a)�rename the license and remove any references to the name of the
+You: (a) rename the license and remove any references to the name of the
 license steward (except to note that the license differs from this License);
-and (b)�otherwise make it clear that the license contains terms which differ
+and (b) otherwise make it clear that the license contains terms which differ
 from this License.
 
 5. DISCLAIMER OF WARRANTY.
@@ -422,14 +422,14 @@ the Participant is a Contributor or the 
 is the Initial Developer) directly or indirectly infringes any patent, then any
 and all rights granted directly or indirectly to You by such Participant, the
 Initial Developer (if the Initial Developer is not the Participant) and all
-Contributors under Sections�2.1 and/or 2.2 of this License shall, upon 60 days
+Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days
 notice from Participant terminate prospectively and automatically at the
 expiration of such 60 day notice period, unless if within such 60 day period
 You withdraw Your claim with respect to the Participant Software against such
 Participant either unilaterally or pursuant to a written agreement with
 Participant.
 
-6.3. In the event of termination under Sections�6.1 or 6.2 above, all end user
+6.3. In the event of termination under Sections 6.1 or 6.2 above, all end user
 licenses that have been validly granted by You or any distributor hereunder
 prior to termination (excluding licenses granted to You by any distributor)
 shall survive termination.
@@ -453,9 +453,9 @@ LIMITATION MAY NOT APPLY TO YOU.
 8. U.S. GOVERNMENT END USERS.
 
 The Covered Software is a commercial item, as that term is defined in
-48�C.F.R.�2.101 (Oct. 1995), consisting of commercial computer software (as
-that term is defined at 48 C.F.R. �252.227-7014(a)(1)) and commercial computer
-software documentation as such terms are used in 48�C.F.R.�12.212 (Sept. 1995).
+48 C.F.R. 2.101 (Oct. 1995), consisting of commercial computer software (as
+that term is defined at 48 C.F.R. § 252.227-7014(a)(1)) and commercial computer
+software documentation as such terms are used in 48 C.F.R. 12.212 Sept. 1995).
 Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4
 (June 1995), all U.S. Government End Users acquire Covered Software with only
 those rights set forth herein. This U.S. Government Rights clause is in lieu
@@ -534,7 +534,7 @@ ANY THEORY OF LIABILITY, WHETHER IN CONT
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ==========================================================================
-The following license applies to easymock.jar
+The following license applies to easymock-2.2.jar
 --------------------------------------------------------------------------
 EasyMock 2 License (MIT License)
 Copyright (c) 2001-2007 OFFIS, Tammo Freese.

Modified: lucene/dev/branches/bulkpostings/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/NOTICE.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/NOTICE.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/NOTICE.txt Tue Mar 15 21:35:17 2011
@@ -1,6 +1,6 @@
 ==============================================================
  Apache Solr
- Copyright 2006-2010 The Apache Software Foundation
+ Copyright 2006-2011 The Apache Software Foundation
 ==============================================================
 
 This product includes software developed by
@@ -42,11 +42,108 @@ License: http://hsqldb.org/web/hsqlLicen
 =========================================================================
 ==  Apache Lucene Notice                                               ==
 =========================================================================
+Includes lib/servlet-api-2.4.jar from  Apache Tomcat
+Includes lib/ant-1.7.1.jar and lib/ant-junit-1.7.1.jar from Apache Ant
+Includes contrib/queries/lib/jakarta-regexp-1.4.jar from Apache Jakarta Regexp
+
+ICU4J, (under contrib/icu) is licensed under an MIT styles license
+(contrib/icu/lib/ICU-LICENSE.txt) and Copyright (c) 1995-2008 
+International Business Machines Corporation and others
+
+Some data files (under contrib/icu/src/data) are derived from Unicode data such
+as the Unicode Character Database. See http://unicode.org/copyright.html for more
+details.
+
+Brics Automaton (under src/java/org/apache/lucene/util/automaton) is 
+BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/
+
+The levenshtein automata tables (under src/java/org/apache/lucene/util/automaton) were
+automatically generated with the moman/finenight FSA library, created by
+Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
+see http://sites.google.com/site/rrettesite/moman and 
+http://bitbucket.org/jpbarrette/moman/overview/
+
+The class org.apache.lucene.SorterTemplate was inspired by CGLIB's class with
+the same name. The implementation part is mainly done using pre-existing
+Lucene sorting code. In-place stable mergesort was borrowed from CGLIB,
+which is Apache-licensed.
+
+The Google Code Prettify is Apache License 2.0.
+See http://code.google.com/p/google-code-prettify/
+
+JUnit (under lib/junit-4.7.jar) is licensed under the Common Public License v. 1.0
+See http://junit.sourceforge.net/cpl-v10.html
+
+JLine (under contrib/lucli/lib/jline.jar) is licensed under the BSD License.
+See http://jline.sourceforge.net/
+
+=========================================================================
+==  Apache Lucene Benchmark Notice                                     ==
+=========================================================================
+Includes software from other Apache Software Foundation projects,
+including, but not limited to:
+ - Commons Beanutils (lib/commons-beanutils-1.7.0.jar)
+ - Commons Collections (lib/commons-collections-3.1.jar)
+ - Commons Compress (lib/commons-compress-1.0.jar)
+ - Commons Digester (lib/commons-digester-1.7.jar)
+ - Commons Logging (lib/commons-logging-1.0.4.jar)
+ - Xerces (lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar)
+
+=========================================================================
+==  Apache Lucene Analyzers Notice                                     ==
+========================================================================= 
+Includes software from other Apache Software Foundation projects,
+including, but not limited to:
+  - Apache Commons
+
 The snowball stemmers in
-  contrib/snowball/src/java/net/sf/snowball
+  common/src/java/net/sf/snowball
+were developed by Martin Porter and Richard Boulton.
+The snowball stopword lists in
+  common/src/resources/org/apache/lucene/analysis/snowball
 were developed by Martin Porter and Richard Boulton.
 The full snowball package is available from
   http://snowball.tartarus.org/
+
+The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (common) come with a default
+stopword list that is BSD-licensed created by Jacques Savoy.  These files reside in:
+common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt
+See http://members.unine.ch/jacques.savoy/clef/index.html.
+
+The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
+(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
+Ljiljana Dolamic. These files reside in:
+common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
+common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
+common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
+common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
+common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
+
+The Stempel analyzer (stempel) includes BSD-licensed software developed 
+by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
+and Edmond Nolan.
+
+The Polish analyzer (stempel) comes with a default
+stopword list that is BSD-licensed created by the Carrot2 project. The file resides
+in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
+See http://project.carrot2.org/license.html.
+
+The SmartChineseAnalyzer source code (smartcn) was
+provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
+
+WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/) 
+is derived from Unicode data such as the Unicode Character Database. 
+See http://unicode.org/copyright.html for more details.
 ---
 
 This product includes/uses software, Woodstox (http://woodstox.codehaus.org),
@@ -160,7 +257,7 @@ See http://project.carrot2.org/
 ==     Guava Notice                                                    ==
 =========================================================================
 
-Copyright ???? Google, Inc.
+Copyright (C) 2009 Google Inc.
 
 This product includes software developed by the Google Guava project.
 
@@ -170,7 +267,7 @@ See http://code.google.com/p/guava-libra
 ==     Prettify Notice                                                 ==
 =========================================================================
 
-Copyright ???? Google, Inc.
+Copyright (C) 2009 Google Inc.
 
 This product includes software developed by the Google Prettify project.
 
@@ -179,7 +276,7 @@ See http://code.google.com/p/google-code
 =========================================================================
 ==     Jackson Notice                                                  ==
 =========================================================================
-Copyright ????
+Copyright 2010 FasterXML, LLC
 
 This product includes software developed by the Jackson project.
 

Modified: lucene/dev/branches/bulkpostings/solr/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/README.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/README.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/README.txt Tue Mar 15 21:35:17 2011
@@ -17,8 +17,8 @@
 Welcome to the Apache Solr project!
 -----------------------------------
 
-Apache Solr is a search server based on the Apache Lucene search
-library. 
+Solr is the popular, blazing fast open source enterprise search platform
+from the Apache Lucene project.
 
 For a complete description of the Solr project, team composition, source
 code repositories, and other details, please see the Solr web site at
@@ -29,12 +29,20 @@ Getting Started
 ---------------
 
 See the "example" directory for an example Solr setup.  A tutorial
-using the example setup can be found in "docs/tutorial.html" 
+using the example setup can be found at
+http://lucene.apache.org/solr/tutorial.html
+or in in "docs/tutorial.html" in a binary distribution.
 
 
 
-Files Included In Apache Solr Distributions
--------------------------------------------
+Files included in an Apache Solr binary distribution
+----------------------------------------------------
+
+example/
+  A self-contained example Solr instance, complete with a sample
+  configuration, documents to index, and the Jetty Servlet container.
+  Please see example/README.txt for information about running this
+  example.
 
 dist/apache-solr-XX.war
   The Apache Solr Application.  Deploy this WAR file to any servlet
@@ -45,21 +53,12 @@ dist/apache-solr-XX.jar
   Apache Solr Plugins (see http://wiki.apache.org/solr/SolrPlugins for
   more information).
 
-example/
-  A self-contained example Solr instance, complete with a sample
-  configuration, documents to index, and the Jetty Servlet container.
-  Please see example/README.txt for information about running this
-  example.
-
 docs/index.html
   The contents of the Apache Solr website.
   
 docs/api/index.html
   The Apache Solr Javadoc API documentation.
 
-src/
-  The Apache Solr source code.
-
 
 
 Instructions for Building Apache Solr from Source
@@ -82,7 +81,7 @@ Instructions for Building Apache Solr fr
 
      http://lucene.apache.org/solr/version_control.html
 
-4. Navigate to that folder and issue an "ant" command to see the available options
+4. Navigate to the "solr" folder and issue an "ant" command to see the available options
    for building, testing, and packaging Solr.
   
    NOTE: 

Modified: lucene/dev/branches/bulkpostings/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/build.xml?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/build.xml Tue Mar 15 21:35:17 2011
@@ -30,7 +30,7 @@
   <property name="src" location="src" />
   <property name="web.xml" location="${src}/webapp/web/WEB-INF/web.xml"/>
 
-  <!-- Destination for distribution files (demo WAR, src distro, etc.) -->
+  <!-- Destination for distribution files (demo WAR, contrib JARs, etc.) -->
   <property name="dist" location="dist" />
 
   <property name="clover.db.dir" location="${dest}/tests/clover/db"/>
@@ -57,6 +57,7 @@
     <echo message="Use 'ant clean' to clean compiled files." />
     <echo message="Use 'ant compile' to compile the source code." />
     <echo message="Use 'ant dist' to build the project WAR and JAR files." />
+    <echo message="Use 'ant javadoc' to build javadoc under build/docs/api" />
     <echo message="Use 'ant generate-maven-artifacts' to generate maven artifacts." />
     <echo message="Use 'ant package' to generate zip, tgz for distribution." />
     <echo message="Use 'ant luke' to start luke.  see: http://luke.googlecode.com" />
@@ -68,12 +69,13 @@
           description="Cleans compiled files and other temporary artifacts.">
     <delete dir="${dest}" />
     <delete dir="${dist}" />
+    <delete dir="${package.dir}" />
     <delete dir="example/solr/lib" />
     <delete dir="${lucene-libs}" />
   </target>
 
   <target name="clean-dest"
-          description="Cleans out ${dest} but leaves ${dist} alone.  This allows us to run nightly and clover together in Hudson">
+          description="Cleans out ${dest} but leaves ${dist} and ${package.dir} alone.  This allows us to run nightly and clover together in Hudson">
     <delete includeemptydirs="true" >
       <fileset dir="${dest}">
         <exclude name="docs/"/>
@@ -156,7 +158,13 @@
     </solr-javac>
   </target>
 
-  <target name="javadoc-solrj" depends="compile-solrj" description="Generates solrj javadoc documentation.">
+  <target name="javadoc-dep">
+    <copy failonerror="false" todir="${build.docs}">
+      <fileset dir="site" />
+    </copy>
+  </target>
+
+  <target name="javadoc-solrj" depends="compile-solrj,javadoc-dep" description="Generates solrj javadoc documentation.">
     <sequential>
       <mkdir dir="${build.javadoc}/solrj"/>
 
@@ -175,7 +183,7 @@
     </sequential>
   </target>
   
-  <target name="javadoc-core" depends="compile"  description="Generates javadoc documentation for core.">
+  <target name="javadoc-core" depends="compile,javadoc-dep"  description="Generates javadoc documentation for core.">
 
     <sequential>
       <mkdir dir="${build.javadoc}/solr"/>
@@ -198,7 +206,7 @@
   </target>
 
 
-  <target name="javadoc-all" description="Generate javadoc for core, client and contrib">
+  <target name="javadoc-all" depends="compile,javadoc-dep" description="Generate javadoc for core, client and contrib">
     <sequential>
       <mkdir dir="${build.javadoc}"/>
 
@@ -234,7 +242,7 @@
     </sequential>
   </target>
 
-  <target name="javadoc-contrib" description="Generate javadoc for contrib classes" depends="build-contrib">
+  <target name="javadoc-contrib" description="Generate javadoc for contrib classes" depends="build-contrib,javadoc-dep">
     <contrib-crawl target="javadoc"
                    failonerror="true"/>
   </target>
@@ -379,7 +387,7 @@
   <!-- Run contrib unit tests. -->
   <target name="test"
         description="Runs the core unit tests."
-        depends="test-core, test-contrib" />
+        depends="test-core, test-contrib, test-jsp" />
 
   <target name="junit" depends="compileTests,junit-mkdir,junit-sequential,junit-parallel"/>
 
@@ -554,6 +562,37 @@
     </clover-report>
   </target>
 
+  <!-- ========================================================================= -->
+
+  <!-- Checks that all JSP files in the webapp compile successfully using Jetty's Jasper -->
+  <target name="test-jsp" depends="compile">
+    <property name="jsp.target" location="${dest}/jsp-temp" />
+    <taskdef classname="org.apache.jasper.JspC" name="jasper" >
+      <classpath>
+        <fileset dir="example/lib" includes="**/*.jar" />
+      </classpath>
+    </taskdef>
+    <delete dir="${jsp.target}" />
+    <mkdir dir="${jsp.target}" />
+    <jasper
+      uriroot="${src}/webapp/web"
+      outputDir="${jsp.target}" 
+      compile="false" 
+      verbose="1"
+      package="j"
+    />
+    <javac
+      srcdir="${jsp.target}"
+      destdir="${jsp.target}"
+      target="${java.compat.version}"
+      source="${java.compat.version}"
+      debug="off"
+      encoding="utf8"
+      includeAntRuntime="${javac.includeAntRuntime}"
+      classpathref="test.compile.classpath"
+    />
+  </target>
+
 
   <!-- ========================================================================= -->
   <!-- ===================== DISTRIBUTION-RELATED TASKS ======================== -->
@@ -568,7 +607,7 @@
   <!-- Creates the Solr WAR file. -->
   <target name="dist-war"
           description="Creates the Solr WAR Distribution file."
-          depends="compile, make-manifest, dist-jar, dist-solrj, lucene-jars-to-solr">
+          depends="compile, test-jsp, make-manifest, dist-jar, dist-solrj, lucene-jars-to-solr">
     <mkdir dir="${dist}" />
     <war destfile="${dist}/${fullnamever}.war"
          webxml="${web.xml}"
@@ -576,7 +615,7 @@
          manifest="${dest}/META-INF/MANIFEST.MF">
        <lib dir="${lib}">
          <exclude name="servlet-api*.jar" />
-         <exclude name="easymock.jar" />
+         <exclude name="easymock-*.jar" />
          <exclude name="junit-*.jar" />
          <exclude name="*.txt" />
          <exclude name="*.template" />
@@ -597,7 +636,7 @@
     </war>
   </target>
 
-  <target name="dist-src" description="Creates the Solr source distribution files"
+  <target name="dist-src" description="Creates the Solr source distribution files for maven"
           depends="make-manifest">
     <mkdir dir="${dist}" />
 
@@ -628,7 +667,7 @@
     </solr-jar>
   </target>
 
-  <target name="dist-javadoc" description="Creates the Solr javadoc distribution files"
+  <target name="dist-javadoc" description="Creates the Solr javadoc distribution files for maven"
           depends="make-manifest, javadoc">
     <mkdir dir="${dist}" />
 
@@ -725,55 +764,142 @@
   </target>
   
   <!-- make a distribution -->
-  <target name="package" depends="create-package"/>
+  <target name="package" depends="package-src,create-package"/>
 
-  <target name="create-package"
-          description="Packages the Solr Distribution files and Documentation."
-          depends="dist, example, javadoc, dist-src, dist-javadoc">
+  <!-- copied from lucene build.xml -->
+  <patternset id="src.dist.patterns"
+              includes="src/,build.xml,*build*.xml,docs/,*.txt,contrib/,*pom.xml*,lib/,backwards/"
+              excludes="contrib/db/*/lib/,contrib/*/ext-libs/,src/site/build/"
+  />
+
+  <target name="package-src"
+          description="Packages the Solr Source Distribution"
+          depends="dist">
+    <mkdir dir="${package.dir}"/>
 
-    <copy failonerror="false" todir="${build.docs}">
-      <fileset dir="site" />
-    </copy>
+    <delete includeemptydirs="true">
+      <fileset dir="${example}/work" includes="**/*"/>
+    </delete>
 
-    <delete file="${dist}/${fullnamever}.tgz" failonerror="false" />
-    <delete file="${dist}/${fullnamever}.zip" failonerror="false" />
+    <delete includeemptydirs="true" failonerror="false">
+      <fileset dir="${dest}/${fullnamever}" includes="**/*"/>
+    </delete>
+ 
+    <delete file="${package.dir}/${fullnamever}-src.tgz" failonerror="false" />
+    <delete file="${package.dir}/${fullnamever}-src.zip" failonerror="false" />
 
-    <tar destfile="${dist}/${fullnamever}.tgz" compression="gzip" longfile="gnu">
+    <tar destfile="${package.dir}/${fullnamever}-src.tgz" compression="gzip" longfile="gnu">
+      <tarfileset dir=".."
+        prefix="${fullnamever}"
+        includes="*.txt *.xml dev-tools/*" />
+      <tarfileset dir="." 
+        prefix="${fullnamever}"
+        includes="LICENSE.txt NOTICE.txt"/>
+      <tarfileset dir="."
+        prefix="${fullnamever}/solr"
+        includes="LICENSE.txt NOTICE.txt *.txt *.xml lib/** src/** example/** client/** contrib/"
+        excludes="${package.dir}/** ${dist}/** example/webapps/*.war lib/README.committers.txt **/data/ **/logs/* **/classes/ **/*.sh **/bin/ src/scripts/ src/site/build/ **/target/ client/python client/javascript contrib/**/build/ **/*.iml **/*.ipr **/*.iws" />
       <tarfileset dir="."
+        prefix="${fullnamever}/solr"
+        includes="src/test-files/solr/lib/classes/empty-file-main-lib.txt" />
+      <tarfileset dir="."
+        mode="755"
+        prefix="${fullnamever}/solr"
+        includes="**/*.sh **/bin/ src/scripts/" />
+      <tarfileset dir="../lucene" prefix="${fullnamever}/lucene">
+        <patternset refid="src.dist.patterns"/>
+      </tarfileset>
+      <tarfileset dir="../modules"
+        prefix="${fullnamever}/modules"
+        excludes="**/build/" />
+    </tar>
+    <solr-checksum file="${package.dir}/${fullnamever}-src.tgz"/>
+
+    <gunzip src="${package.dir}/${fullnamever}-src.tgz" dest="${dest}/${fullnamever}-src.tar"/>
+    <untar src="${dest}/${fullnamever}-src.tar" dest="${dest}"/>
+
+    <!--
+      This is a list of text file patterns to convert to CRLF line-ending style.
+      Shell scripts and files included in shell scripts should not be converted.
+      NB: The line-ending conversion process will mangle non-UTF8-encoded files.
+     -->
+    <fixcrlf srcdir="${dest}/${fullnamever}"
+       encoding="UTF-8"
+       eol="crlf"
+       includes="**/*.alg **/*.cgi **/*.cpp **/*.css **/*.csv **/*.dtd **/*.erb
+                 **/*.fcgi **/.htaccess **/*.htm **/*.html **/*.incl **/*.java
+                 **/*.javacc **/*.jflex **/*.jflex-macro **/*.jj **/*.js
+                 **/*.json **/*.jsp **/*LICENSE **/*.pl **/*.pom
+                 **/*pom.xml.template **/*.properties **/*.py **/*.rake
+                 **/Rakefile **/*.rb **/*.rbbi **/README **/README.* **/*.rhtml
+                 **/*.rslp **/*.rxml **/*.script **/*.svg **/*.tsv **/*.txt
+                 **/UPGRADING **/USAGE **/*.uxf **/*.vm **/*.xcat **/*.xml
+                 **/*.xsl **/*.xslt **/*.yml"
+       excludes="**/stopwordsWrongEncoding.txt **/gb18030-example.xml"
+     />
+
+    <zip destfile="${package.dir}/${fullnamever}-src.zip">
+      <zipfileset dir="${dest}/${fullnamever}"
+        prefix="${fullnamever}" 
+        excludes="**/*.sh **/bin/ src/scripts/" />
+      <zipfileset dir="${dest}/${fullnamever}"
         prefix="${fullnamever}"
-        includes="LICENSE.txt NOTICE.txt *.txt *.xml lucene-libs/** lib/** src/** example/** client/** contrib/"
-        excludes="lib/README.committers.txt **/data/ **/logs/* **/classes/ **/*.sh **/bin/ src/scripts/ src/site/build/ **/target/ client/ruby/flare/ client/python contrib/**/build/ **/*.iml **/*.ipr **/*.iws contrib/clustering/example/lib/** contrib/clustering/lib/downloads/** contrib/analysis-extras/lib/** contrib/uima/lib/**" />
+        includes="**/*.sh **/bin/ src/scripts/"
+        filemode="755" />
+    </zip>
+    <solr-checksum file="${package.dir}/${fullnamever}-src.zip"/>
+
+  </target>
+
+
+  <target name="create-package"
+          description="Packages the Solr Binary Distribution"
+          depends="dist, example, javadoc">
+    <mkdir dir="${package.dir}"/>
+
+    <delete includeemptydirs="true">
+      <fileset dir="${example}/work" includes="**/*"/>
+    </delete>
+
+    <delete includeemptydirs="true" failonerror="false">
+      <fileset dir="${dest}/${fullnamever}" includes="**/*"/>
+    </delete>
+
+    <delete file="${package.dir}/${fullnamever}.tgz" failonerror="false" />
+    <delete file="${package.dir}/${fullnamever}.zip" failonerror="false" />
+
+    <tar destfile="${package.dir}/${fullnamever}.tgz" compression="gzip" longfile="gnu">
       <tarfileset dir="."
         prefix="${fullnamever}"
-        includes="src/test-files/solr/lib/classes/empty-file-main-lib.txt" />
+        includes="LICENSE.txt NOTICE.txt *.txt example/** client/** contrib/**/lib/ contrib/**/lucene-libs/ contrib/**/README.txt contrib/**/CHANGES.txt"
+        excludes="${dist}/** ${dest}/** lib/README.committers.txt **/data/ **/logs/* **/classes/ **/*.sh **/bin/ src/scripts/** src/site/build/** **/target/** client/ruby/flare/** client/python/** client/javascript/** contrib/**/build/** **/*.iml **/*.ipr **/*.iws" />
       <tarfileset dir="."
         mode="755"
         prefix="${fullnamever}"
-        includes="**/*.sh **/bin/ src/scripts/" />
+        includes="example/**/*.sh example/**/bin/" />
       <tarfileset dir="."
         prefix="${fullnamever}"
-        includes="dist/**"
-        excludes="*.tgz *.zip *.md5 **/*src*.jar **/*docs*.jar" />
+        includes="dist/*.jar dist/*.war dist/solrj-lib/*"
+        excludes="**/*.tgz **/*.zip **/*.md5 **/*src*.jar **/*docs*.jar" />
+      <!-- hack: the javadocs are built twice since maven needs separate packages... exclude those -->
       <tarfileset dir="${build.docs}"
-        prefix="${fullnamever}/docs/" />
-      <tarfileset dir="../lucene"
-        prefix="lucene"
-        excludes="**/build/" />
-      <tarfileset dir="../modules"
-        prefix="modules"
-        excludes="**/build/" />
+        prefix="${fullnamever}/docs/"
+        excludes="api/solr/** api/contrib-*/**"
+       />
     </tar>
-    <solr-checksum file="${dist}/${fullnamever}.tgz"/>
+    <solr-checksum file="${package.dir}/${fullnamever}.tgz"/>
 
-    <gunzip src="${dist}/${fullnamever}.tgz" dest="${dest}/${fullnamever}.tar"/>
+    <gunzip src="${package.dir}/${fullnamever}.tgz" dest="${dest}/${fullnamever}.tar"/>
     <untar src="${dest}/${fullnamever}.tar" dest="${dest}"/>
 
     <fixcrlf srcdir="${dest}/${fullnamever}"
+       encoding="UTF-8"
        eol="crlf"
        includes="**/*.txt **/*.xml **/*.java **/*.html **/*.csv **/*.css **/*.properties **/*.jsp **/*.xsl **/*.py **/*.rb **/*.js **/*.json **/*.pl"
+       excludes="**/stopwordsWrongEncoding.txt **/gb18030-example.xml"
      />
 
-    <zip destfile="${dist}/${fullnamever}.zip">
+    <zip destfile="${package.dir}/${fullnamever}.zip">
       <zipfileset dir="${dest}/${fullnamever}"
         prefix="${fullnamever}" 
         excludes="**/*.sh **/bin/ src/scripts/" />
@@ -781,17 +907,12 @@
         prefix="${fullnamever}"
         includes="**/*.sh **/bin/ src/scripts/"
         filemode="755" />
-      <zipfileset dir="../lucene"
-        prefix="lucene"
-        excludes="**/build/" />
-      <zipfileset dir="../modules"
-        prefix="modules"
-        excludes="**/build/" />
     </zip>
-    <solr-checksum file="${dist}/${fullnamever}.zip"/>
+    <solr-checksum file="${package.dir}/${fullnamever}.zip"/>
 
   </target>
 
+
   	<target name="build-site" depends="svn-up, init-forrest-entities" description="Prototype Helper for Committers.  Assumes SVN is in the path">
 		<delete dir="src/site/build"/>
 		<exec executable="forrest" dir="src/site"/>
@@ -817,24 +938,30 @@
 		</exec>
 	</target>
 
-  <target name="clean-dist-signatures">
+  <target name="clean-package-signatures">
     <delete failonerror="false">
-      <fileset dir="${dist}">
+      <fileset dir="${package.dir}">
         <include name="**/*.asc"/>
       </fileset>
     </delete>
   </target>
-  <target name="sign-artifacts" depends="clean-dist-signatures">
+  <target name="sign-artifacts" depends="clean-package-signatures">
     <!--<property file="${user.home}/.solr/build.properties" />-->
     <input message="password:>" addproperty="gpg.passphrase">
       <handler classname="org.apache.tools.ant.input.SecureInputHandler" />
     </input>
-    <sign-artifact input.file="${dist}/${fullnamever}.tgz" output.file="${dist}/${fullnamever}.tgz.asc" gpg.passphrase="${gpg.passphrase}"/>
-    <sign-artifact input.file="${dist}/${fullnamever}.zip" output.file="${dist}/${fullnamever}.zip.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${package.dir}/${fullnamever}.tgz" output.file="${package.dir}/${fullnamever}.tgz.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${package.dir}/${fullnamever}.zip" output.file="${package.dir}/${fullnamever}.zip.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${package.dir}/${fullnamever}-src.tgz" output.file="${package.dir}/${fullnamever}-src.tgz.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${package.dir}/${fullnamever}-src.zip" output.file="${package.dir}/${fullnamever}-src.zip.asc" gpg.passphrase="${gpg.passphrase}"/>
 
     <!-- Maven artifacts -->
     <sign-maven-dependency-artifacts artifact.id="solr-commons-csv" gpg.passphrase="${gpg.passphrase}"/>
     <sign-maven-dependency-artifacts artifact.id="solr-noggit" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-maven-dependency-artifacts artifact.id="solr-uima-an-alchemy" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-maven-dependency-artifacts artifact.id="solr-uima-an-calais" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-maven-dependency-artifacts artifact.id="solr-uima-an-tagger" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-maven-dependency-artifacts artifact.id="solr-uima-an-wst" gpg.passphrase="${gpg.passphrase}"/>
     <sign-maven-war-artifacts artifact.id="solr" gpg.passphrase="${gpg.passphrase}"/>
     <sign-maven-artifacts artifact.id="solr-analysis-extras" gpg.passphrase="${gpg.passphrase}"/>
     <sign-maven-artifacts artifact.id="solr-cell" gpg.passphrase="${gpg.passphrase}"/>
@@ -843,22 +970,16 @@
     <sign-maven-artifacts artifact.id="solr-dataimporthandler" gpg.passphrase="${gpg.passphrase}"/>
     <sign-maven-artifacts artifact.id="solr-dataimporthandler-extras" gpg.passphrase="${gpg.passphrase}"/>
     <sign-maven-artifacts artifact.id="solr-solrj" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-maven-artifacts artifact.id="solr-uima" gpg.passphrase="${gpg.passphrase}"/>
 
     <!-- These are special since there are no jars, just poms -->
     <sign-artifact input.file="${maven.dist.prefix}/solr-parent/${version}/solr-parent-${version}.pom" gpg.passphrase="${gpg.passphrase}"/>
   </target>
 
-  <target name="prepare-release" depends="clean, svn-up, build-site, package, sign-artifacts" description="Prototype helper for Committers.  Assumes gpg is in the path">
+  <target name="prepare-release" depends="clean, svn-up, build-site, package, sign-artifacts" 
+          description="Prototype helper for Committers.  Assumes gpg is in the path"/>
 
-    <tar destfile="${dist}/solr-maven.tar" longfile="gnu">
-		  <tarfileset dir="${dist}/maven" prefix="maven"/>
-    </tar>
-	<tar destfile="${dist}/solr.tar" longfile="gnu">
-		<tarfileset dir="${dist}" includes="${fullnamever}.* solr-maven.tar" excludes="${fullnamever}.war"/>
-	</tar>
-  </target>
-
-  <target name="generate-maven-artifacts" depends="maven.ant.tasks-check, create-package">
+  <target name="generate-maven-artifacts" depends="maven.ant.tasks-check,dist,dist-src,dist-javadoc">
     <sequential>
 	  <ant target="get-maven-poms" dir=".."/>
 
@@ -877,14 +998,26 @@
 
       <m2-deploy pom.xml="pom.xml"/>
 
-      <!-- ========== SOLR SPECIFIC COMMONS CSV ========== -->
+      <!-- ========== SOLR SPECIFIC NON-MAVENIZED DEPENDENCIES ========== -->
       <m2-deploy-with-pom-template pom.xml="lib/solr-commons-csv-pom.xml.template"
                                    jar.file="lib/commons-csv-1.0-SNAPSHOT-r966014.jar" />
 
-      <!-- ========== SOLR ARTIFACTS ========== -->
-
       <m2-deploy-with-pom-template pom.xml="lib/apache-solr-noggit-pom.xml.template"
                                    jar.file="lib/apache-solr-noggit-r944541.jar" />
+      
+      <m2-deploy-with-pom-template pom.xml="contrib/uima/lib/solr-uima-an-alchemy-pom.xml.template"
+                                   jar.file="contrib/uima/lib/uima-an-alchemy-2.3.1-SNAPSHOT-r1062868.jar" />
+
+      <m2-deploy-with-pom-template pom.xml="contrib/uima/lib/solr-uima-an-calais-pom.xml.template"
+                                   jar.file="contrib/uima/lib/uima-an-calais-2.3.1-SNAPSHOT-r1062868.jar" />
+
+      <m2-deploy-with-pom-template pom.xml="contrib/uima/lib/solr-uima-an-tagger-pom.xml.template"
+                                   jar.file="contrib/uima/lib/uima-an-tagger-2.3.1-SNAPSHOT-r1062868.jar" />
+
+      <m2-deploy-with-pom-template pom.xml="contrib/uima/lib/solr-uima-an-wst-pom.xml.template"
+                                   jar.file="contrib/uima/lib/uima-an-wst-2.3.1-SNAPSHOT-r1076132.jar" />
+
+      <!-- ========== SOLR ARTIFACTS ========== -->
 
       <m2-deploy pom.xml="contrib/dataimporthandler/src/pom.xml"
                  jar.file="${dist}/apache-solr-dataimporthandler-${version}.jar">

Modified: lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/solr/conf/solrconfig.xml?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/solr/conf/solrconfig.xml Tue Mar 15 21:35:17 2011
@@ -17,19 +17,11 @@
 -->
 
 <config>
-  <!-- Set this to 'false' if you want solr to continue working after it has 
-       encountered an severe configuration error.  In a production environment, 
-       you may want solr to keep working even if one handler is mis-configured.
-
-       You may also set this to false using by setting the system property:
-         -Dsolr.abortOnConfigurationError=false
-     -->
-  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
 
   <!-- Used to specify an alternate directory to hold all index data
        other than the default ./data under the Solr home.
        If replication is in use, this should match the replication configuration. -->
-  <dataDir>${solr.data.dir:./solr/data}</dataDir>
+  <dataDir>${solr.data.dir:}</dataDir>
 
   <indexDefaults>
    <!-- Values here affect all index writers and act as a default unless overridden. -->

Modified: lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/test/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/test/conf/solrconfig.xml?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/test/conf/solrconfig.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/client/ruby/solr-ruby/test/conf/solrconfig.xml Tue Mar 15 21:35:17 2011
@@ -17,20 +17,12 @@
 -->
 
 <config>
-  <!-- Set this to 'false' if you want solr to continue working after it has 
-       encountered an severe configuration error.  In a production environment, 
-       you may want solr to keep working even if one handler is mis-configured.
-
-       You may also set this to false using by setting the system property:
-         -Dsolr.abortOnConfigurationError=false
-     -->
-  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
 
   <!-- Used to specify an alternate directory to hold all index data
        other than the default ./data under the Solr home.
        If replication is in use, this should match the replication configuration. -->
   <!--
-  <dataDir>./solr/data</dataDir>
+  <dataDir>${solr.data.dir:}</dataDir>
   -->
 
   <indexDefaults>

Modified: lucene/dev/branches/bulkpostings/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/common-build.xml?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/common-build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/common-build.xml Tue Mar 15 21:35:17 2011
@@ -35,6 +35,7 @@
 
   <property name="dev-tools.dir" value="${solr-path}/../dev-tools"/>
   <property name="prettify.dir" value="${dev-tools.dir}/prettify"/>
+  <property name="package.dir" location="package"/>
 
   <tstamp>
     <format property="year" pattern="yyyy"/>
@@ -150,7 +151,7 @@
 
   <!-- Maven properties -->
   <property name="maven.build.dir" value="${basedir}/build/maven"/>
-  <property name="maven.dist.dir" value="${basedir}/dist/maven"/>
+  <property name="maven.dist.dir" value="${package.dir}/maven"/>
 
   <property name="maven.dist.prefix" value="${maven.dist.dir}/org/apache/solr"/>
 
@@ -306,25 +307,10 @@
    -->
   <macrodef name="solr-checksum">
     <attribute name="file"/>
-    <!-- NOTE: we use the value of @{file} in the names any properties
-         set because macro's don't have variables, and otherwise we
-         wouldn't be able to checksum more then one file per build
-      -->
-    <sequential>
-     <checksum file="@{file}"
-               property="@{file}.sum"
-               algorithm="${checksum.algorithm}" />
-     <basename file="@{file}" property="@{file}.base" />
-     <concat destfile="@{file}.${checksum.algorithm}"
-             force="yes"
-             append="false"
-             fixlastline="yes">
-       <header trimleading="yes">${@{file}.sum}  </header>
-       <!-- empty fileset to trick concat -->
-       <fileset dir="." excludes="**" />
-       <footer trimleading="yes">${@{file}.base}
-       </footer>
-     </concat>
+    <sequential>
+      <echo>Building checksums for '@{file}'</echo>
+      <checksum file="@{file}" algorithm="md5" format="MD5SUM" forceoverwrite="yes" readbuffersize="65536"/>
+      <checksum file="@{file}" algorithm="sha1" format="MD5SUM" forceoverwrite="yes" readbuffersize="65536"/>
     </sequential>
   </macrodef>
 
@@ -347,7 +333,8 @@
     <attribute name="destdir"/>
   	<attribute name="title" default="${Name} ${version} API (${specversion})"/>
     <sequential>
-      <copy todir="@{destdir}/../prettify" overwrite="false">
+      <mkdir dir="@{destdir}"/>
+      <copy todir="@{destdir}/prettify" overwrite="false">
         <fileset dir="${prettify.dir}"/>
       </copy>
       <javadoc
@@ -363,7 +350,7 @@
           link="${javadoc.link.java}"
           windowtitle="${Name} ${version} API"
           doctitle="@{title}"
-          stylesheetfile="@{destdir}/../prettify/stylesheet+prettify.css"
+          stylesheetfile="@{destdir}/prettify/stylesheet+prettify.css"
           bottom="Copyright &amp;copy; ${year} Apache Software Foundation.  All Rights Reserved.">
         <tag name="todo" description="To Do:"/>
         <tag name="uml.property" description="UML Property:"/>
@@ -376,7 +363,7 @@
         <link href="${javadoc.link.junit}"/>
         <link href="${javadoc.link.lucene}"/>
       	<header><![CDATA[
-      		 <script src="{@docRoot}/../prettify/prettify.js" type="text/javascript"></script>
+      		 <script src="{@docRoot}/prettify/prettify.js" type="text/javascript"></script>
       		 <script language="JavaScript">window.onload=function(){windowTitle();prettyPrint();}</script>
       	]]></header>
 
@@ -454,34 +441,32 @@
   <macrodef name="sign-maven-artifacts" description="Signs maven artifacts">
     <attribute name="artifact.id"/>
     <attribute name="prefix.dir" default="${maven.dist.prefix}"/>
-    <attribute name="maven.version" default="${version}"/>
     <attribute name="gpg.passphrase"/>
     <sequential>
-      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}.jar" gpg.passphrase="@{gpg.passphrase}"/>
-      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}-javadoc.jar" gpg.passphrase="@{gpg.passphrase}"/>
-      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}-sources.jar" gpg.passphrase="@{gpg.passphrase}"/>
-      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}.pom" gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}.jar" gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}-javadoc.jar" gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}-sources.jar" gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}.pom" gpg.passphrase="@{gpg.passphrase}"/>
     </sequential>
   </macrodef>
 
   <macrodef name="sign-maven-war-artifacts" description="Signs maven artifacts">
     <attribute name="artifact.id"/>
     <attribute name="prefix.dir" default="${maven.dist.prefix}"/>
-    <attribute name="maven.version" default="${version}"/>
     <attribute name="gpg.passphrase"/>
     <sequential>
-      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}.war" gpg.passphrase="@{gpg.passphrase}"/>
-      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}.pom" gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}.war" gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}.pom" gpg.passphrase="@{gpg.passphrase}"/>
     </sequential>
   </macrodef>
 
-  <macrodef name="sign-maven-dependency-artifacts" description="Signs maven artifacts">
+  <macrodef name="sign-maven-dependency-artifacts" description="Signs a maven artifact and its POM">
     <attribute name="artifact.id"/>
     <attribute name="prefix.dir" default="${maven.dist.prefix}"/>
-    <attribute name="maven.version" default="${version}"/>
     <attribute name="gpg.passphrase"/>
     <sequential>
-      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/@{maven.version}/@{artifact.id}-@{maven.version}.jar"  gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}.jar"  gpg.passphrase="@{gpg.passphrase}"/>
+      <sign-artifact input.file="@{prefix.dir}/@{artifact.id}/${version}/@{artifact.id}-${version}.pom"  gpg.passphrase="@{gpg.passphrase}"/>
     </sequential>
   </macrodef>
 

Modified: lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/CHANGES.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/CHANGES.txt Tue Mar 15 21:35:17 2011
@@ -15,6 +15,10 @@ $Id$
 
 (No Changes)
 
+================== Release 3.2-dev ==================
+
+(No Changes)
+
 ================== Release 3.1-dev ==================
 
 * SOLR-2210: Add icu-based tokenizer and filters to contrib/analysis-extras (rmuir)

Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/CHANGES.txt?rev=1081952&r1=1081951&r2=1081952&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/CHANGES.txt Tue Mar 15 21:35:17 2011
@@ -11,6 +11,10 @@ $Id$
 
 (No Changes)
 
+================== Release 3.2.0-dev ==================
+
+(No Changes)
+
 ================== Release 3.1.0-dev ==================
 
 * SOLR-1684: Switch to use the SolrIndexSearcher.doc(int, Set<String>) method b/c it can use the document cache (gsingers)