You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2012/11/23 13:01:26 UTC

svn commit: r1412849 [9/13] - in /lucene/dev/branches/LUCENE-2878: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/idea/solr/contrib/dataimporthandler/ dev-tools/maven/ dev-tools/maven/solr/contrib/dataimporthandler/ dev-tool...

Modified: lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Fri Nov 23 12:00:32 2012
@@ -19,10 +19,10 @@ package org.apache.lucene.search.suggest
 
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.InputStream;
 import java.io.FileOutputStream;
-import java.io.OutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.ArrayList;
@@ -39,6 +39,7 @@ import org.apache.lucene.analysis.Canned
 import org.apache.lucene.analysis.CannedBinaryTokenStream;
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockBytesAttributeFactory;
 import org.apache.lucene.analysis.MockTokenFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
@@ -133,6 +134,15 @@ public class AnalyzingSuggesterTest exte
     assertEquals(50, results.get(0).value, 0.01F);
   }
 
+  public void testEmpty() throws Exception {
+    Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+    AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
+    suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
+
+    List<LookupResult> result = suggester.lookup("a", false, 20);
+    assertTrue(result.isEmpty());
+  }
+
   public void testNoSeps() throws Exception {
     TermFreq[] keys = new TermFreq[] {
       new TermFreq("ab cd", 0),
@@ -446,6 +456,11 @@ public class AnalyzingSuggesterTest exte
         return 0;
       }
     }
+
+    @Override
+    public String toString() {
+      return surfaceForm + "/" + weight;
+    }
   }
 
   static boolean isStopChar(char ch, int numStopChars) {
@@ -503,6 +518,8 @@ public class AnalyzingSuggesterTest exte
     private int numStopChars;
     private boolean preserveHoles;
 
+    private final MockBytesAttributeFactory factory = new MockBytesAttributeFactory();
+
     public MockTokenEatingAnalyzer(int numStopChars, boolean preserveHoles) {
       this.preserveHoles = preserveHoles;
       this.numStopChars = numStopChars;
@@ -510,7 +527,7 @@ public class AnalyzingSuggesterTest exte
 
     @Override
     public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
+      MockTokenizer tokenizer = new MockTokenizer(factory, reader, MockTokenizer.WHITESPACE, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
       tokenizer.setEnableChecks(true);
       TokenStream next;
       if (numStopChars != 0) {
@@ -522,6 +539,8 @@ public class AnalyzingSuggesterTest exte
     }
   }
 
+  private static char SEP = '\uFFFF';
+
   public void testRandom() throws Exception {
 
     int numQueries = atLeast(1000);
@@ -558,13 +577,13 @@ public class AnalyzingSuggesterTest exte
               if (token > 0) {
                 key += " ";
               }
-              if (preserveSep && analyzedKey.length() > 0 && analyzedKey.charAt(analyzedKey.length()-1) != ' ') {
-                analyzedKey += " ";
+              if (preserveSep && analyzedKey.length() > 0 && analyzedKey.charAt(analyzedKey.length()-1) != SEP) {
+                analyzedKey += SEP;
               }
               key += s;
               if (s.length() == 1 && isStopChar(s.charAt(0), numStopChars)) {
                 if (preserveSep && preserveHoles) {
-                  analyzedKey += '\u0000';
+                  analyzedKey += SEP;
                 }
               } else {
                 analyzedKey += s;
@@ -574,7 +593,7 @@ public class AnalyzingSuggesterTest exte
           }
         }
 
-        analyzedKey = analyzedKey.replaceAll("(^| )\u0000$", "");
+        analyzedKey = analyzedKey.replaceAll("(^|" + SEP + ")" + SEP + "$", "");
 
         // Don't add same surface form more than once:
         if (!seen.contains(key)) {
@@ -599,7 +618,7 @@ public class AnalyzingSuggesterTest exte
       List<TermFreq2> sorted = new ArrayList<TermFreq2>(slowCompletor);
       Collections.sort(sorted);
       for(TermFreq2 ent : sorted) {
-        System.out.println("  surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
+        System.out.println("  surface='" + ent.surfaceForm + "' analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
       }
     }
 
@@ -618,20 +637,20 @@ public class AnalyzingSuggesterTest exte
       List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random()), false, topN);
 
       // 2. go thru whole set to find suggestions:
-      List<LookupResult> matches = new ArrayList<LookupResult>();
+      List<TermFreq2> matches = new ArrayList<TermFreq2>();
 
       // "Analyze" the key:
       String[] tokens = prefix.split(" ");
       StringBuilder builder = new StringBuilder();
       for(int i=0;i<tokens.length;i++) {
         String token = tokens[i];
-        if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(" ")) {
-          builder.append(' ');
+        if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(""+SEP)) {
+          builder.append(SEP);
         }
 
         if (token.length() == 1 && isStopChar(token.charAt(0), numStopChars)) {
           if (preserveSep && preserveHoles) {
-            builder.append("\u0000");
+            builder.append(SEP);
           }
         } else {
           builder.append(token);
@@ -644,8 +663,7 @@ public class AnalyzingSuggesterTest exte
       // not tell us any trailing holes, yet ... there is an
       // issue open for this):
       while (true) {
-        String s = analyzedKey.replaceAll("(^| )\u0000$", "");
-        s = s.replaceAll("\\s+$", "");
+        String s = analyzedKey.replaceAll(SEP + "$", "");
         if (s.equals(analyzedKey)) {
           break;
         }
@@ -665,18 +683,18 @@ public class AnalyzingSuggesterTest exte
       // TODO: could be faster... but its slowCompletor for a reason
       for (TermFreq2 e : slowCompletor) {
         if (e.analyzedForm.startsWith(analyzedKey)) {
-          matches.add(new LookupResult(e.surfaceForm, e.weight));
+          matches.add(e);
         }
       }
 
       assertTrue(numStopChars > 0 || matches.size() > 0);
 
       if (matches.size() > 1) {
-        Collections.sort(matches, new Comparator<LookupResult>() {
-            public int compare(LookupResult left, LookupResult right) {
-              int cmp = Float.compare(right.value, left.value);
+        Collections.sort(matches, new Comparator<TermFreq2>() {
+            public int compare(TermFreq2 left, TermFreq2 right) {
+              int cmp = Float.compare(right.weight, left.weight);
               if (cmp == 0) {
-                return left.compareTo(right);
+                return left.analyzedForm.compareTo(right.analyzedForm);
               } else {
                 return cmp;
               }
@@ -690,8 +708,8 @@ public class AnalyzingSuggesterTest exte
 
       if (VERBOSE) {
         System.out.println("  expected:");
-        for(LookupResult lr : matches) {
-          System.out.println("    key=" + lr.key + " weight=" + lr.value);
+        for(TermFreq2 lr : matches) {
+          System.out.println("    key=" + lr.surfaceForm + " weight=" + lr.weight);
         }
 
         System.out.println("  actual:");
@@ -704,8 +722,8 @@ public class AnalyzingSuggesterTest exte
 
       for(int hit=0;hit<r.size();hit++) {
         //System.out.println("  check hit " + hit);
-        assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
-        assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
+        assertEquals(matches.get(hit).surfaceForm.toString(), r.get(hit).key.toString());
+        assertEquals(matches.get(hit).weight, r.get(hit).value, 0f);
       }
     }
   }
@@ -806,7 +824,7 @@ public class AnalyzingSuggesterTest exte
           new TermFreq("a c b", 1),
         }));
 
-    List<LookupResult> results = suggester.lookup("a", false, 4);
+    suggester.lookup("a", false, 4);
   }
 
   public void testExactFirstMissingResult() throws Exception {
@@ -983,4 +1001,62 @@ public class AnalyzingSuggesterTest exte
     assertEquals("b", results.get(1).key);
     assertEquals(5, results.get(1).value);
   }
+
+  public void test0ByteKeys() throws Exception {
+    final Analyzer a = new Analyzer() {
+        @Override
+        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+        
+          return new TokenStreamComponents(tokenizer) {
+            int tokenStreamCounter = 0;
+            final TokenStream[] tokenStreams = new TokenStream[] {
+              new CannedBinaryTokenStream(new BinaryToken[] {
+                  token(new BytesRef(new byte[] {0x0, 0x0, 0x0})),
+                }),
+              new CannedBinaryTokenStream(new BinaryToken[] {
+                  token(new BytesRef(new byte[] {0x0, 0x0})),
+                }),
+              new CannedBinaryTokenStream(new BinaryToken[] {
+                  token(new BytesRef(new byte[] {0x0, 0x0, 0x0})),
+                }),
+              new CannedBinaryTokenStream(new BinaryToken[] {
+                  token(new BytesRef(new byte[] {0x0, 0x0})),
+                }),
+            };
+
+            @Override
+            public TokenStream getTokenStream() {
+              TokenStream result = tokenStreams[tokenStreamCounter];
+              tokenStreamCounter++;
+              return result;
+            }
+         
+            @Override
+            protected void setReader(final Reader reader) throws IOException {
+            }
+          };
+        }
+      };
+
+    AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
+
+    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+          new TermFreq("a a", 50),
+          new TermFreq("a b", 50),
+        }));
+  }
+
+  public void testDupSurfaceFormsMissingResults3() throws Exception {
+    Analyzer a = new MockAnalyzer(random());
+    AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
+    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+          new TermFreq("a a", 7),
+          new TermFreq("a a", 7),
+          new TermFreq("a c", 6),
+          new TermFreq("a c", 3),
+          new TermFreq("a b", 5),
+        }));
+    assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
+  }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java Fri Nov 23 12:00:32 2012
@@ -22,12 +22,13 @@ import java.util.*;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
 import org.apache.lucene.search.suggest.TermFreq;
 import org.apache.lucene.search.suggest.TermFreqArrayIterator;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
 
 public class WFSTCompletionTest extends LuceneTestCase {
   
-  public void test() throws Exception {
+  public void testBasic() throws Exception {
     TermFreq keys[] = new TermFreq[] {
         new TermFreq("foo", 50),
         new TermFreq("bar", 10),
@@ -194,4 +195,26 @@ public class WFSTCompletionTest extends 
       }
     }
   }
+
+  public void test0ByteKeys() throws Exception {
+    BytesRef key1 = new BytesRef(4);
+    key1.length = 4;
+    BytesRef key2 = new BytesRef(3);
+    key1.length = 3;
+
+    WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
+
+    suggester.build(new TermFreqArrayIterator(new TermFreq[] {
+          new TermFreq(key1, 50),
+          new TermFreq(key2, 50),
+        }));
+  }
+
+  public void testEmpty() throws Exception {
+    WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
+
+    suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
+    List<LookupResult> result = suggester.lookup("a", false, 20);
+    assertTrue(result.isEmpty());
+  }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java Fri Nov 23 12:00:32 2012
@@ -24,37 +24,40 @@ import org.apache.lucene.codecs.StoredFi
 import org.apache.lucene.codecs.lucene41.Lucene41Codec;
 
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 
 /**
  * A codec that uses {@link CompressingStoredFieldsFormat} for its stored
  * fields and delegates to {@link Lucene41Codec} for everything else.
  */
-public class CompressingCodec extends FilterCodec {
+public abstract class CompressingCodec extends FilterCodec {
 
   /**
    * Create a random instance.
    */
+  public static CompressingCodec randomInstance(Random random, int chunkSize) {
+    switch (random.nextInt(4)) {
+    case 0:
+      return new FastCompressingCodec(chunkSize);
+    case 1:
+      return new FastDecompressionCompressingCodec(chunkSize);
+    case 2:
+      return new HighCompressionCompressingCodec(chunkSize);
+    case 3:
+      return new DummyCompressingCodec(chunkSize);
+    default:
+      throw new AssertionError();
+    }
+  }
+
   public static CompressingCodec randomInstance(Random random) {
-    final CompressionMode mode = RandomPicks.randomFrom(random, CompressionMode.values());
-    final int chunkSize = RandomInts.randomIntBetween(random, 1, 500);
-    final CompressingStoredFieldsIndex index = RandomPicks.randomFrom(random, CompressingStoredFieldsIndex.values());
-    return new CompressingCodec(mode, chunkSize, index);
+    return randomInstance(random, RandomInts.randomIntBetween(random, 1, 500));
   }
 
   private final CompressingStoredFieldsFormat storedFieldsFormat;
 
-  /**
-   * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(CompressionMode, int, CompressingStoredFieldsIndex)
-   */
-  public CompressingCodec(CompressionMode compressionMode, int chunkSize,
-      CompressingStoredFieldsIndex storedFieldsIndexFormat) {
-    super("Compressing", new Lucene41Codec());
-    this.storedFieldsFormat = new CompressingStoredFieldsFormat(compressionMode, chunkSize, storedFieldsIndexFormat);
-  }
-
-  public CompressingCodec() {
-    this(CompressionMode.FAST, 1 << 14, CompressingStoredFieldsIndex.MEMORY_CHUNK);
+  public CompressingCodec(String name, CompressionMode compressionMode, int chunkSize) {
+    super(name, new Lucene41Codec());
+    this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, compressionMode, chunkSize);
   }
 
   @Override

Modified: lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java Fri Nov 23 12:00:32 2012
@@ -146,7 +146,7 @@ public final class FieldFilterAtomicRead
 
     @Override
     public Iterator<String> iterator() {
-      return new FilterIterator<String>(super.iterator()) {
+      return new FilterIterator<String, String>(super.iterator()) {
         @Override
         protected boolean predicateFunction(String field) {
           return hasField(field);

Modified: lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java Fri Nov 23 12:00:32 2012
@@ -96,8 +96,6 @@ public class MockDirectoryWrapper extend
   // is made to delete an open file, we enroll it here.
   private Set<String> openFilesDeleted;
 
-  final RateLimiter rateLimiter;
-
   private synchronized void init() {
     if (openFiles == null) {
       openFiles = new HashMap<String,Integer>();
@@ -120,19 +118,6 @@ public class MockDirectoryWrapper extend
         .mBitsToBytes(40 + randomState.nextInt(10)), 5 + randomState.nextInt(5), null);
     // force wrapping of lockfactory
     this.lockFactory = new MockLockFactoryWrapper(this, delegate.getLockFactory());
-
-    // 2% of the time use rate limiter
-    if (randomState.nextInt(50) == 17) {
-      // Use RateLimiter
-      double maxMBPerSec = 10 + 5*(randomState.nextDouble()-0.5);
-      if (LuceneTestCase.VERBOSE) {
-        System.out.println("MockDirectoryWrapper: will rate limit output IO to " + maxMBPerSec + " MB/sec");
-      }
-      rateLimiter = new RateLimiter(maxMBPerSec);
-    } else {
-      rateLimiter = null;
-    }
-
     init();
   }
 
@@ -177,7 +162,9 @@ public class MockDirectoryWrapper extend
       throw new IOException("cannot sync after crash");
     }
     unSyncedFiles.removeAll(names);
-    if (LuceneTestCase.rarely(randomState) || delegate instanceof NRTCachingDirectory) {
+    // TODO: need to improve hack to be OK w/
+    // RateLimitingDirWrapper in between...
+    if (true || LuceneTestCase.rarely(randomState) || delegate instanceof NRTCachingDirectory) {
       // don't wear out our hardware so much in tests.
       delegate.sync(names);
     }
@@ -447,7 +434,6 @@ public class MockDirectoryWrapper extend
         ramdir.fileMap.put(name, file);
       }
     }
-    
     //System.out.println(Thread.currentThread().getName() + ": MDW: create " + name);
     IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name, LuceneTestCase.newIOContext(randomState, context)), name);
     addFileHandle(io, name, Handle.Output);
@@ -455,7 +441,7 @@ public class MockDirectoryWrapper extend
     
     // throttling REALLY slows down tests, so don't do it very often for SOMETIMES.
     if (throttling == Throttling.ALWAYS || 
-        (throttling == Throttling.SOMETIMES && rateLimiter == null && randomState.nextInt(50) == 0)) {
+        (throttling == Throttling.SOMETIMES && randomState.nextInt(50) == 0) && !(delegate instanceof RateLimitedDirectoryWrapper)) {
       if (LuceneTestCase.VERBOSE) {
         System.out.println("MockDirectoryWrapper: throttling indexOutput");
       }

Modified: lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java Fri Nov 23 12:00:32 2012
@@ -77,11 +77,6 @@ public class MockIndexOutputWrapper exte
   public void writeBytes(byte[] b, int offset, int len) throws IOException {
     long freeSpace = dir.maxSize == 0 ? 0 : dir.maxSize - dir.sizeInBytes();
     long realUsage = 0;
-
-    if (dir.rateLimiter != null && len >= 1000) {
-      dir.rateLimiter.pause(len);
-    }
-
     // If MockRAMDir crashed since we were opened, then
     // don't write anything:
     if (dir.crashed)

Modified: lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Fri Nov 23 12:00:32 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.search.*;
 import org.apache.lucene.search.FieldCache.CacheEntry;
 import org.apache.lucene.search.QueryUtils.FCInvisibleMultiReader;
 import org.apache.lucene.store.*;
+import org.apache.lucene.store.IOContext.Context;
 import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
 import org.apache.lucene.util.FieldCacheSanityChecker.Insanity;
 import org.junit.*;
@@ -947,6 +948,27 @@ public abstract class LuceneTestCase ext
     if (rarely(random)) {
       directory = new NRTCachingDirectory(directory, random.nextDouble(), random.nextDouble());
     }
+    
+    if (rarely(random)) { 
+      final double maxMBPerSec = 10 + 5*(random.nextDouble()-0.5);
+      if (LuceneTestCase.VERBOSE) {
+        System.out.println("LuceneTestCase: will rate limit output IndexOutput to " + maxMBPerSec + " MB/sec");
+      }
+      final RateLimitedDirectoryWrapper rateLimitedDirectoryWrapper = new RateLimitedDirectoryWrapper(directory);
+      switch (random.nextInt(10)) {
+        case 3: // sometimes rate limit on flush
+          rateLimitedDirectoryWrapper.setMaxWriteMBPerSec(maxMBPerSec, Context.FLUSH);
+          break;
+        case 2: // sometimes rate limit flush & merge
+          rateLimitedDirectoryWrapper.setMaxWriteMBPerSec(maxMBPerSec, Context.FLUSH);
+          rateLimitedDirectoryWrapper.setMaxWriteMBPerSec(maxMBPerSec, Context.MERGE);
+          break;
+        default:
+          rateLimitedDirectoryWrapper.setMaxWriteMBPerSec(maxMBPerSec, Context.MERGE);
+      }
+      directory =  rateLimitedDirectoryWrapper;
+      
+    }
 
     if (bare) {
       BaseDirectoryWrapper base = new BaseDirectoryWrapper(directory);
@@ -954,6 +976,7 @@ public abstract class LuceneTestCase ext
       return base;
     } else {
       MockDirectoryWrapper mock = new MockDirectoryWrapper(random, directory);
+      
       mock.setThrottling(TEST_THROTTLING);
       closeAfterSuite(new CloseableDirectory(mock, suiteFailureMarker));
       return mock;

Modified: lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec Fri Nov 23 12:00:32 2012
@@ -14,4 +14,7 @@
 #  limitations under the License.
 
 org.apache.lucene.codecs.asserting.AssertingCodec
-org.apache.lucene.codecs.compressing.CompressingCodec
+org.apache.lucene.codecs.compressing.FastCompressingCodec
+org.apache.lucene.codecs.compressing.FastDecompressionCompressingCodec
+org.apache.lucene.codecs.compressing.HighCompressionCompressingCodec
+org.apache.lucene.codecs.compressing.DummyCompressingCodec

Modified: lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/commons-io.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/commons-io.txt?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/commons-io.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/commons-io.txt Fri Nov 23 12:00:32 2012
@@ -1,3 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 # These methods and classes from commons-io should not be used by Solr classes (unsafe, no charset,...):
 
 org.apache.commons.io.IOUtils#copy(java.io.InputStream,java.io.Writer)

Modified: lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/executors.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/executors.txt?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/executors.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/executors.txt Fri Nov 23 12:00:32 2012
@@ -1,3 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 # These methods spawn threads with vague names. Use a custom thread factory and name
 # threads so that you can tell (by its name) which executor it is associated with.
 # see Solr's DefaultSolrThreadFactory

Modified: lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk-deprecated.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk-deprecated.txt?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk-deprecated.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk-deprecated.txt Fri Nov 23 12:00:32 2012
@@ -1,3 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 # these are all deprecated in jdk 6: make them fail the build (java[x].* only)
 # http://docs.oracle.com/javase/6/docs/api/deprecated-list.html
 # TODO: would be great to find a simple way to autogen this

Modified: lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk.txt?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/jdk.txt Fri Nov 23 12:00:32 2012
@@ -1,3 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 # These methods and classes should not be used by Lucene classes (unsafe, no charset,...):
 
 java.lang.String#<init>(byte[])

Modified: lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/system-out.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/system-out.txt?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/system-out.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/system-out.txt Fri Nov 23 12:00:32 2012
@@ -1,3 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 # Don't allow System.out/System.err usage in non-test Lucene code
 java.lang.System#out
 java.lang.System#err

Modified: lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/tests.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/tests.txt?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/tests.txt (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/tools/forbiddenApis/tests.txt Fri Nov 23 12:00:32 2012
@@ -1,3 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 # All classes should derive from LuceneTestCase
 junit.framework.TestCase
 

Modified: lucene/dev/branches/LUCENE-2878/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/CHANGES.txt?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/CHANGES.txt (original)
+++ lucene/dev/branches/LUCENE-2878/solr/CHANGES.txt Fri Nov 23 12:00:32 2012
@@ -26,6 +26,14 @@ $Id$
 
 ==================  4.1.0 ==================
 
+Versions of Major Components
+---------------------
+Apache Tika 1.2
+Carrot2 3.5.0
+Velocity 1.6.4 and Velocity Tools 2.0
+Apache UIMA 2.3.1
+Apache ZooKeeper 3.4.5
+
 Detailed Change List
 ----------------------
 
@@ -55,6 +63,31 @@ New Features
 * SOLR-3911: Make Directory and DirectoryFactory first class so that the majority 
   of Solr's features work with any custom implementations. (Mark Miller)
 
+* SOLR-1972: Add extra statistics to RequestHandlers - 5 & 15-minute reqs/sec
+  rolling averages; median, 75th, 95th, 99th, 99.9th percentile request times
+  (Alan Woodward, Shawn Heisey, Adrien Grand)
+  
+* SOLR-4051: Add <propertyWriter /> element to DIH's data-config.xml file,  
+  allowing the user to specify the location, filename and Locale for
+  the "data-config.properties" file.  Alternatively, users can specify their
+  own property writer implementation for greater control. This new configuration 
+  element is optional, and defaults mimic prior behavior.  The one exception is 
+  that the "root" locale is default. Previously it was the machine's default locale.  
+  (James Dyer)
+
+* SOLR-4084: Add FuzzyLookupFactory, which is like AnalyzingSuggester except that 
+  it can tolerate typos in the input.  (Areek Zillur via Robert Muir)
+
+* SOLR-4088: New and improved auto host detection strategy for SolrCloud.
+  (Raintung Li via Mark Miller)
+
+* SOLR-3970: SystemInfoHandler now exposes more details about the 
+  JRE/VM/Java version in use.  (hossman)
+
+* SOLR-4101: Add support for storing term offsets in the index via a
+  'storeOffsetsWithPositions' flag on field definitions in the schema.
+  (Tom Winch, Alan Woodward)
+
 Optimizations
 ----------------------
 
@@ -73,10 +106,16 @@ Optimizations
   
 * SOLR-3941: The "commitOnLeader" part of distributed recovery can use
   openSearcher=false. (Tomas Fernandez Lobbe via Mark Miller)
+  
+* SOLR-4063: Allow CoreContainer to load multiple SolrCores in parallel rather
+  than just serially. (Mark Miller)
 
 Bug Fixes
 ----------------------
 
+* SOLR-4007: Morfologik dictionaries not available in Solr field type
+  due to class loader lookup problems. (Lance Norskog, Dawid Weiss)
+
 * SOLR-3560: Handle different types of Exception Messages for Logging UI
   (steffkes)
 
@@ -129,6 +168,78 @@ Bug Fixes
   Directory outstanding, we need to wait for them to be released before closing.
   (Mark Miller)
 
+* SOLR-4005: If CoreContainer fails to register a created core, it should close it.
+  (Mark Miller)
+
+* SOLR-4009: OverseerCollectionProcessor is not resilient to many error conditions
+  and can stop running on errors. (Raintung Li, milesli, Mark Miller)
+
+* SOLR-4019: Log stack traces for 503/Service Unavailable SolrException if not
+  thrown by PingRequestHandler. Do not log exceptions if a user tries to view a
+  hidden file using ShowFileRequestHandler.  (Tomás Fernández Löbbe via James Dyer)
+
+* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token.
+  (Tom Burton-West, Robert Muir)
+
+* SOLR-4031: Upgrade to Jetty 8.1.7 to fix a bug where in very rare occasions 
+  the content of two concurrent requests get mixed up. (Per Steffensen, yonik)
+
+* SOLR-4060: ReplicationHandler can try and do a snappull and open a new IndexWriter
+  after shutdown has already occurred, leaving an IndexWriter that is not closed.
+  (Mark Miller)
+
+* SOLR-4055: Fix a thread safety issue with the Collections API that could
+  cause actions to be targeted at the wrong SolrCores. 
+  (Raintung Li via Mark Miller)
+
+* SOLR-3993: If multiple SolrCore's for a shard coexist on a node, on cluster
+  restart, leader election would stall until timeout, waiting to see all of 
+  the replicas come up. (Mark Miller, Alexey Kudinov)
+  
+* SOLR-2045: Databases that require a commit to be issued before closing the 
+  connection on a non-read-only database leak connections.  Also expanded the
+  SqlEntityProcessor test to sometimes use Derby as well as HSQLDB (Derby is
+  one db affected by this bug).  (Fenlor Sebastia, James Dyer)
+
+* SOLR-4064: When there is an unexpected exception while trying to run the new
+  leader process, the SolrCore will not correctly rejoin the election.
+  (Po Rui Via Mark Miller)
+
+* SOLR-3989: SolrZkClient constructor dropped exception cause when throwing
+  a new RuntimeException. (Colin Bartolome, yonik)
+
+* SOLR-4036: field aliases in fl should not cause properties of target field
+  to be used. (Martin Koch, yonik)
+
+* SOLR-4003: The SolrZKClient clean method should not try and clear zk paths
+  that start with /zookeeper, as this can fail and stop the removal of
+  further nodes. (Mark Miller)
+
+* SOLR-4076: SolrQueryParser should run fuzzy terms through
+  MultiTermAwareComponents to ensure that (for example) a fuzzy query of
+  foobar~2 is equivalent to FooBar~2 on a field that includes lowercasing.
+  (yonik)
+
+* SOLR-4081: QueryParsing.toString, used during debugQuery=true, did not
+  correctly handle ExtendedQueries such as WrappedQuery
+  (used when cache=false), spatial queries, and frange queires.
+  (Eirik Lygre, yonik)
+
+* SOLR-3959: Ensure the internal comma separator of poly fields is escaped
+  for CSVResponseWriter.  (Areek Zillur via Robert Muir)
+  
+* SOLR-4075: A logical shard that has had all of it's SolrCores unloaded should 
+  be removed from the cluster state. (Mark Miller, Gilles Comeau)
+  
+* SOLR-4034: Check if a collection already exists before trying to create a
+  new one. (Po Rui, Mark Miller)
+
+* SOLR-4097: Race can cause NPE in logging line on first cluster state update.
+  (Mark Miller)
+  
+* SOLR-4099: Allow the collection api work queue to make forward progress even
+  when it's watcher is not fired for some reason. (Raintung Li via Mark Miller)
+
 Other Changes
 ----------------------
 
@@ -143,6 +254,30 @@ Other Changes
 
 * SOLR-3932: SolrCmdDistributorTest either takes 3 seconds or 3 minutes.
   (yonik, Mark Miller)  
+  
+* SOLR-3856: New tests for SqlEntityProcessor/CachedSqlEntityProcessor 
+  (James Dyer)
+
+* SOLR-4067: ZkStateReader#getLeaderProps should not return props for a leader
+  that it does not think is live. (Mark Miller)
+  
+* SOLR-4086: DIH refactor of VariableResolver and Evaluator.  VariableResolver
+  and each built-in Evaluator are separate concrete classes.  DateFormatEvaluator
+  now defaults with the ROOT Locale. However, users may specify a different 
+  Locale using an optional new third parameter. (James Dyer)
+
+* SOLR-3602: Update ZooKeeper to 3.4.5 (Mark Miller)
+
+* SOLR-4095: DIH NumberFormatTransformer & DateFormatTransformer default to the
+  ROOT Locale if none is specified.  These previously used the machine's default.
+  (James Dyer)
+  
+* SOLR-4096: DIH FileDataSource & FieldReaderDataSource default to UTF-8 encoding
+  if none is specified.  These previously used the machine's default.
+  (James Dyer)
+  
+* SOLR-1916: DIH to not use Lucene-forbidden Java APIs 
+  (default encoding, locale, etc.) (James Dyer, Robert Muir)
 
 ==================  4.0.0 ==================
 

Modified: lucene/dev/branches/LUCENE-2878/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/build.xml?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/build.xml (original)
+++ lucene/dev/branches/LUCENE-2878/solr/build.xml Fri Nov 23 12:00:32 2012
@@ -203,9 +203,7 @@
         <include name="executors.txt" />
       </apiFileSet>
       <fileset dir="${basedir}/build">
-        <include name="**/*.class" />
-        <!-- exclude DIH for now as it is broken with Locales and Encodings: SOLR-1916 -->
-        <exclude name="contrib/solr-dataimporthandler*/**" />
+        <include name="**/*.class" />        
       </fileset>
     </forbidden-apis>
   </target>
@@ -526,6 +524,15 @@
   <!-- TODO: also integrate checkJavaDocs.py, which does more checks -->
   <target name="javadocs-lint" depends="javadocs">
     <check-broken-links dir="build/docs"/>
+    <!-- prevent the modules without problems from getting worse -->
+    <check-missing-javadocs dir="${javadoc.dir}/solr-analysis-extras" level="package"/>
+    <check-missing-javadocs dir="${javadoc.dir}/solr-cell" level="package"/>
+    <check-missing-javadocs dir="${javadoc.dir}/solr-clustering" level="package"/>
+    <!-- solr-core: problems: -->
+    <check-missing-javadocs dir="${javadoc.dir}/solr-dataimporthandler" level="package"/>
+    <check-missing-javadocs dir="${javadoc.dir}/solr-dataimporthandler-extras" level="package"/>
+    <check-missing-javadocs dir="${javadoc.dir}/solr-solrj" level="package"/>
+    <check-missing-javadocs dir="${javadoc.dir}/solr-test-framework" level="package"/>
   </target>
  
   <target name="generate-maven-artifacts" depends="install-maven-tasks">

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java Fri Nov 23 12:00:32 2012
@@ -85,7 +85,7 @@ public class MailEntityProcessor extends
     String s = getStringFromContext("fetchMailsSince", null);
     if (s != null)
       try {
-        fetchMailsSince = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(s);
+        fetchMailsSince = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).parse(s);
       } catch (ParseException e) {
         throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid value for fetchMailSince: " + s, e);
       }

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ContextImpl.java Fri Nov 23 12:00:32 2012
@@ -38,7 +38,7 @@ public class ContextImpl extends Context
 
   private ContextImpl parent;
 
-  private VariableResolverImpl resolver;
+  private VariableResolver resolver;
 
   private DataSource ds;
 
@@ -55,7 +55,7 @@ public class ContextImpl extends Context
   DocBuilder docBuilder;
 
 
-  public ContextImpl(EntityProcessorWrapper epw, VariableResolverImpl resolver,
+  public ContextImpl(EntityProcessorWrapper epw, VariableResolver resolver,
                      DataSource ds, String currProcess,
                      Map<String, Object> global, ContextImpl parentContext, DocBuilder docBuilder) {
     this.epw = epw;

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImporter.java Fri Nov 23 12:00:32 2012
@@ -29,10 +29,15 @@ import org.apache.solr.handler.dataimpor
 import org.apache.solr.handler.dataimport.config.ConfigParseUtil;
 import org.apache.solr.handler.dataimport.config.DIHConfiguration;
 import org.apache.solr.handler.dataimport.config.Entity;
+import org.apache.solr.handler.dataimport.config.PropertyWriter;
 import org.apache.solr.handler.dataimport.config.Script;
 
 import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
 import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
+import static org.apache.solr.handler.dataimport.DocBuilder.loadClass;
+import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.CLASS;
+import static org.apache.solr.handler.dataimport.config.ConfigNameConstants.NAME;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@@ -78,7 +83,6 @@ public class DataImporter {
   public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
   private SolrCore core;  
   private Map<String, Object> coreScopeSession = new ConcurrentHashMap<String,Object>();
-  private DIHPropertiesWriter propWriter;
   private ReentrantLock importLock = new ReentrantLock();
   private boolean isDeltaImportSupported = false;  
   private final String handlerName;  
@@ -88,8 +92,6 @@ public class DataImporter {
    * Only for testing purposes
    */
   DataImporter() {
-    createPropertyWriter();
-    propWriter.init(this);
     this.handlerName = "dataimport" ;
   }
   
@@ -97,19 +99,10 @@ public class DataImporter {
     this.handlerName = handlerName;
     this.core = core;
     this.schema = core.getSchema();
-    loadSchemaFieldMap();
-    createPropertyWriter();    
+    loadSchemaFieldMap();   
   }
   
-  private void createPropertyWriter() {
-    if (this.core == null
-        || !this.core.getCoreDescriptor().getCoreContainer().isZooKeeperAware()) {
-      propWriter = new SimplePropertiesWriter();
-    } else {
-      propWriter = new ZKPropertiesWriter();
-    }
-    propWriter.init(this);
-  }
+  
 
   
   boolean maybeReloadConfiguration(RequestInfo params,
@@ -278,7 +271,7 @@ public class DataImporter {
         }
       }
     }
-    List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, DATA_SRC);
+    List<Element> dataSourceTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.DATA_SRC);
     if (!dataSourceTags.isEmpty()) {
       for (Element element : dataSourceTags) {
         Map<String,String> p = new HashMap<String,String>();
@@ -295,7 +288,54 @@ public class DataImporter {
         break;        
       } 
     }
-    return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources);
+    PropertyWriter pw = null;
+    List<Element> propertyWriterTags = ConfigParseUtil.getChildNodes(e, ConfigNameConstants.PROPERTY_WRITER);
+    if (propertyWriterTags.isEmpty()) {
+      boolean zookeeper = false;
+      if (this.core != null
+          && this.core.getCoreDescriptor().getCoreContainer()
+              .isZooKeeperAware()) {
+        zookeeper = true;
+      }
+      pw = new PropertyWriter(zookeeper ? "ZKPropertiesWriter"
+          : "SimplePropertiesWriter", Collections.<String,String> emptyMap());
+    } else if (propertyWriterTags.size() > 1) {
+      throw new DataImportHandlerException(SEVERE, "Only one "
+          + ConfigNameConstants.PROPERTY_WRITER + " can be configured.");
+    } else {
+      Element pwElement = propertyWriterTags.get(0);
+      String type = null;
+      Map<String,String> params = new HashMap<String,String>();
+      for (Map.Entry<String,String> entry : ConfigParseUtil.getAllAttributes(
+          pwElement).entrySet()) {
+        if (TYPE.equals(entry.getKey())) {
+          type = entry.getValue();
+        } else {
+          params.put(entry.getKey(), entry.getValue());
+        }
+      }
+      if (type == null) {
+        throw new DataImportHandlerException(SEVERE, "The "
+            + ConfigNameConstants.PROPERTY_WRITER + " element must specify "
+            + TYPE);
+      }
+      pw = new PropertyWriter(type, params);
+    }
+    return new DIHConfiguration(documentTags.get(0), this, functions, script, dataSources, pw);
+  }
+    
+  @SuppressWarnings("unchecked")
+  private DIHProperties createPropertyWriter() {
+    DIHProperties propWriter = null;
+    PropertyWriter configPw = config.getPropertyWriter();
+    try {
+      Class<DIHProperties> writerClass = DocBuilder.loadClass(configPw.getType(), this.core);
+      propWriter = writerClass.newInstance();
+      propWriter.init(this, configPw.getParameters());
+    } catch (Exception e) {
+      throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Unable to PropertyWriter implementation:" + configPw.getType(), e);
+    }
+    return propWriter;
   }
 
   DIHConfiguration getConfig() {
@@ -374,11 +414,11 @@ public class DataImporter {
     LOG.info("Starting Full Import");
     setStatus(Status.RUNNING_FULL_DUMP);
 
-    setIndexStartTime(new Date());
-
     try {
-      docBuilder = new DocBuilder(this, writer, propWriter, requestParams);
-      checkWritablePersistFile(writer);
+      DIHProperties dihPropWriter = createPropertyWriter();
+      setIndexStartTime(dihPropWriter.getCurrentTimestamp());
+      docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
+      checkWritablePersistFile(writer, dihPropWriter);
       docBuilder.execute();
       if (!requestParams.isDebug())
         cumulativeStatistics.add(docBuilder.importStatistics);
@@ -392,10 +432,8 @@ public class DataImporter {
 
   }
 
-  private void checkWritablePersistFile(SolrWriter writer) {
-//    File persistFile = propWriter.getPersistFile();
-//    boolean isWritable = persistFile.exists() ? persistFile.canWrite() : persistFile.getParentFile().canWrite();
-    if (isDeltaImportSupported && !propWriter.isWritable()) {
+  private void checkWritablePersistFile(SolrWriter writer, DIHProperties dihPropWriter) {
+   if (isDeltaImportSupported && !dihPropWriter.isWritable()) {
       throw new DataImportHandlerException(SEVERE,
           "Properties is not writable. Delta imports are supported by data config but will not work.");
     }
@@ -406,9 +444,10 @@ public class DataImporter {
     setStatus(Status.RUNNING_DELTA_DUMP);
 
     try {
-      setIndexStartTime(new Date());
-      docBuilder = new DocBuilder(this, writer, propWriter, requestParams);
-      checkWritablePersistFile(writer);
+      DIHProperties dihPropWriter = createPropertyWriter();
+      setIndexStartTime(dihPropWriter.getCurrentTimestamp());
+      docBuilder = new DocBuilder(this, writer, dihPropWriter, requestParams);
+      checkWritablePersistFile(writer, dihPropWriter);
       docBuilder.execute();
       if (!requestParams.isDebug())
         cumulativeStatistics.add(docBuilder.importStatistics);
@@ -476,6 +515,30 @@ public class DataImporter {
   DocBuilder getDocBuilder() {
     return docBuilder;
   }
+  
+  Map<String, Evaluator> getEvaluators() {
+    return getEvaluators(config.getFunctions());
+  }
+  
+  /**
+   * used by tests.
+   */
+  Map<String, Evaluator> getEvaluators(List<Map<String,String>> fn) {
+    Map<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
+    evaluators.put(Evaluator.DATE_FORMAT_EVALUATOR, new DateFormatEvaluator());
+    evaluators.put(Evaluator.SQL_ESCAPE_EVALUATOR, new SqlEscapingEvaluator());
+    evaluators.put(Evaluator.URL_ENCODE_EVALUATOR, new UrlEvaluator());
+    evaluators.put(Evaluator.ESCAPE_SOLR_QUERY_CHARS, new SolrQueryEscapingEvaluator());
+    SolrCore core = docBuilder == null ? null : docBuilder.dataImporter.getCore();
+    for (Map<String, String> map : fn) {
+      try {
+        evaluators.put(map.get(NAME), (Evaluator) loadClass(map.get(CLASS), core).newInstance());
+      } catch (Exception e) {
+        wrapAndThrow(SEVERE, e, "Unable to instantiate evaluator: " + map.get(CLASS));
+      }
+    }
+    return evaluators;    
+  }
 
   static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
     @Override
@@ -484,12 +547,7 @@ public class DataImporter {
     }
   };
 
-  static final ThreadLocal<SimpleDateFormat> DATE_TIME_FORMAT = new ThreadLocal<SimpleDateFormat>() {
-    @Override
-    protected SimpleDateFormat initialValue() {
-      return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
-    }
-  };
+  
 
   static final class MSG {
     public static final String NO_CONFIG_FOUND = "Configuration not found";
@@ -563,4 +621,5 @@ public class DataImporter {
   public static final String RELOAD_CONF_CMD = "reload-config";
 
   public static final String SHOW_CONF_CMD = "show-config";
+  
 }

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java Fri Nov 23 12:00:32 2012
@@ -49,7 +49,7 @@ public class DateFormatTransformer exten
   public Object transformRow(Map<String, Object> aRow, Context context) {
 
     for (Map<String, String> map : context.getAllEntityFields()) {
-      Locale locale = Locale.getDefault();
+      Locale locale = Locale.ROOT;
       String customLocale = map.get("locale");
       if(customLocale != null){
         locale = new Locale(customLocale);

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DebugLogger.java Fri Nov 23 12:00:32 2012
@@ -22,6 +22,7 @@ import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.text.MessageFormat;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Stack;
@@ -50,7 +51,7 @@ class DebugLogger {
   private static final String LINE = "---------------------------------------------";
 
   private MessageFormat fmt = new MessageFormat(
-          "----------- row #{0}-------------");
+          "----------- row #{0}-------------", Locale.ROOT);
 
   boolean enabled = true;
 

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java Fri Nov 23 12:00:32 2012
@@ -31,6 +31,7 @@ import org.apache.solr.schema.SchemaFiel
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
@@ -67,17 +68,17 @@ public class DocBuilder {
   Map<String, Object> session = new HashMap<String, Object>();
 
   static final ThreadLocal<DocBuilder> INSTANCE = new ThreadLocal<DocBuilder>();
-  private Map<String, Object> functionsNamespace;
-  private Properties persistedProperties;
+  //private Map<String, Object> functionsNamespace;
+  private Map<String, Object> persistedProperties;
   
-  private DIHPropertiesWriter propWriter;
+  private DIHProperties propWriter;
   private static final String PARAM_WRITER_IMPL = "writerImpl";
   private static final String DEFAULT_WRITER_NAME = "SolrWriter";
   private DebugLogger debugLogger;
   private final RequestInfo reqParams;
   
   @SuppressWarnings("unchecked")
-  public DocBuilder(DataImporter dataImporter, SolrWriter solrWriter, DIHPropertiesWriter propWriter, RequestInfo reqParams) {
+  public DocBuilder(DataImporter dataImporter, SolrWriter solrWriter, DIHProperties propWriter, RequestInfo reqParams) {
     INSTANCE.set(this);
     this.dataImporter = dataImporter;
     this.reqParams = reqParams;
@@ -85,8 +86,7 @@ public class DocBuilder {
     DataImporter.QUERY_COUNT.set(importStatistics.queryCount);
     verboseDebug = reqParams.isDebug() && reqParams.getDebugInfo().verbose;
     persistedProperties = propWriter.readIndexerProperties();
-    functionsNamespace = EvaluatorBag.getFunctionsNamespace(this.dataImporter.getConfig().getFunctions(), this, getVariableResolver());
-    
+     
     String writerClassStr = null;
     if(reqParams!=null && reqParams.getRawParams() != null) {
       writerClassStr = (String) reqParams.getRawParams().get(PARAM_WRITER_IMPL);
@@ -113,30 +113,32 @@ public class DocBuilder {
     return debugLogger;
   }
 
-  public VariableResolverImpl getVariableResolver() {
+  private VariableResolver getVariableResolver() {
     try {
-      VariableResolverImpl resolver = null;
+      VariableResolver resolver = null;
       if(dataImporter != null && dataImporter.getCore() != null
           && dataImporter.getCore().getResourceLoader().getCoreProperties() != null){
-        resolver =  new VariableResolverImpl(dataImporter.getCore().getResourceLoader().getCoreProperties());
-      } else resolver = new VariableResolverImpl();
+        resolver =  new VariableResolver(dataImporter.getCore().getResourceLoader().getCoreProperties());
+      } else {
+        resolver = new VariableResolver();
+      }
+      resolver.setEvaluators(dataImporter.getEvaluators());
       Map<String, Object> indexerNamespace = new HashMap<String, Object>();
-      if (persistedProperties.getProperty(LAST_INDEX_TIME) != null) {
-        indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.getProperty(LAST_INDEX_TIME));
+      if (persistedProperties.get(LAST_INDEX_TIME) != null) {
+        indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.get(LAST_INDEX_TIME));
       } else  {
         // set epoch
-        indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
+        indexerNamespace.put(LAST_INDEX_TIME, EPOCH);
       }
       indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
       indexerNamespace.put("request", reqParams.getRawParams());
-      indexerNamespace.put("functions", functionsNamespace);
       for (Entity entity : dataImporter.getConfig().getEntities()) {
         String key = entity.getName() + "." + SolrWriter.LAST_INDEX_KEY;
-        String lastIndex = persistedProperties.getProperty(key);
-        if (lastIndex != null) {
+        Object lastIndex = persistedProperties.get(key);
+        if (lastIndex != null && lastIndex instanceof Date) {
           indexerNamespace.put(key, lastIndex);
         } else  {
-          indexerNamespace.put(key, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
+          indexerNamespace.put(key, EPOCH);
         }
       }
       resolver.addNamespace(ConfigNameConstants.IMPORTER_NS_SHORT, indexerNamespace);
@@ -149,12 +151,6 @@ public class DocBuilder {
     }
   }
   
-  private Map<String,Object> getFunctionsNamespace() {
-    if(functionsNamespace==null) {
-      
-    }
-    return functionsNamespace;
-  }
 
   private void invokeEventListener(String className) {
     try {
@@ -206,9 +202,8 @@ public class DocBuilder {
       }
       AtomicBoolean fullCleanDone = new AtomicBoolean(false);
       //we must not do a delete of *:* multiple times if there are multiple root entities to be run
-      Properties lastIndexTimeProps = new Properties();
-      lastIndexTimeProps.setProperty(LAST_INDEX_KEY,
-              DataImporter.DATE_TIME_FORMAT.get().format(dataImporter.getIndexStartTime()));
+      Map<String,Object> lastIndexTimeProps = new HashMap<String,Object>();
+      lastIndexTimeProps.put(LAST_INDEX_KEY, dataImporter.getIndexStartTime());
 
       epwList = new ArrayList<EntityProcessorWrapper>(config.getEntities().size());
       for (Entity e : config.getEntities()) {
@@ -217,8 +212,7 @@ public class DocBuilder {
       for (EntityProcessorWrapper epw : epwList) {
         if (entities != null && !entities.contains(epw.getEntity().getName()))
           continue;
-        lastIndexTimeProps.setProperty(epw.getEntity().getName() + "." + LAST_INDEX_KEY,
-                DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
+        lastIndexTimeProps.put(epw.getEntity().getName() + "." + LAST_INDEX_KEY, propWriter.getCurrentTimestamp());
         currentEntityProcessorWrapper = epw;
         String delQuery = epw.getEntity().getAllAttributes().get("preImportDeleteQuery");
         if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) {
@@ -243,7 +237,7 @@ public class DocBuilder {
 
       if (stop.get()) {
         // Dont commit if aborted using command=abort
-        statusMessages.put("Aborted", DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
+        statusMessages.put("Aborted", new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date()));
         rollback();
       } else {
         // Do not commit unnecessarily if this is a delta-import and no documents were created or deleted
@@ -295,7 +289,7 @@ public class DocBuilder {
   }
 
   @SuppressWarnings("unchecked")
-  private void finish(Properties lastIndexTimeProps) {
+  private void finish(Map<String,Object> lastIndexTimeProps) {
     LOG.info("Import completed successfully");
     statusMessages.put("", "Indexing completed. Added/Updated: "
             + importStatistics.docCount + " documents. Deleted "
@@ -329,7 +323,7 @@ public class DocBuilder {
   @SuppressWarnings("unchecked")
   private void doDelta() {
     addStatusMessage("Delta Dump started");
-    VariableResolverImpl resolver = getVariableResolver();
+    VariableResolver resolver = getVariableResolver();
 
     if (config.getDeleteQuery() != null) {
       writer.deleteByQuery(config.getDeleteQuery());
@@ -352,7 +346,7 @@ public class DocBuilder {
     writer.setDeltaKeys(allPks);
 
     statusMessages.put("Total Changed Documents", allPks.size());
-    VariableResolverImpl vri = getVariableResolver();
+    VariableResolver vri = getVariableResolver();
     Iterator<Map<String, Object>> pkIter = allPks.iterator();
     while (pkIter.hasNext()) {
       Map<String, Object> map = pkIter.next();
@@ -392,7 +386,7 @@ public class DocBuilder {
   
   @SuppressWarnings("unchecked")
   public void addStatusMessage(String msg) {
-    statusMessages.put(msg, DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
+    statusMessages.put(msg, new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date()));
   }
 
   private void resetEntity(EntityProcessorWrapper epw) {
@@ -403,7 +397,7 @@ public class DocBuilder {
     
   }
   
-  private void buildDocument(VariableResolverImpl vr, DocWrapper doc,
+  private void buildDocument(VariableResolver vr, DocWrapper doc,
       Map<String,Object> pk, EntityProcessorWrapper epw, boolean isRoot,
       ContextImpl parentCtx) {
     List<EntityProcessorWrapper> entitiesToDestroy = new ArrayList<EntityProcessorWrapper>();
@@ -420,7 +414,7 @@ public class DocBuilder {
   }
 
   @SuppressWarnings("unchecked")
-  private void buildDocument(VariableResolverImpl vr, DocWrapper doc,
+  private void buildDocument(VariableResolver vr, DocWrapper doc,
                              Map<String, Object> pk, EntityProcessorWrapper epw, boolean isRoot,
                              ContextImpl parentCtx, List<EntityProcessorWrapper> entitiesToDestroy) {
 
@@ -707,24 +701,32 @@ public class DocBuilder {
   }
 
   private String findMatchingPkColumn(String pk, Map<String, Object> row) {
-    if (row.containsKey(pk))
-      throw new IllegalArgumentException(
-        String.format("deltaQuery returned a row with null for primary key %s", pk));
+    if (row.containsKey(pk)) {
+      throw new IllegalArgumentException(String.format(Locale.ROOT,
+          "deltaQuery returned a row with null for primary key %s", pk));
+    }
     String resolvedPk = null;
     for (String columnName : row.keySet()) {
       if (columnName.endsWith("." + pk) || pk.endsWith("." + columnName)) {
         if (resolvedPk != null)
           throw new IllegalArgumentException(
-            String.format(
+            String.format(Locale.ROOT, 
               "deltaQuery has more than one column (%s and %s) that might resolve to declared primary key pk='%s'",
               resolvedPk, columnName, pk));
         resolvedPk = columnName;
       }
     }
-    if (resolvedPk == null)
+    if (resolvedPk == null) {
       throw new IllegalArgumentException(
-        String.format("deltaQuery has no column to resolve to declared primary key pk='%s'", pk));
-    LOG.info(String.format("Resolving deltaQuery column '%s' to match entity's declared pk '%s'", resolvedPk, pk));
+          String
+              .format(
+                  Locale.ROOT,
+                  "deltaQuery has no column to resolve to declared primary key pk='%s'",
+                  pk));
+    }
+    LOG.info(String.format(Locale.ROOT,
+        "Resolving deltaQuery column '%s' to match entity's declared pk '%s'",
+        resolvedPk, pk));
     return resolvedPk;
   }
 
@@ -736,7 +738,7 @@ public class DocBuilder {
    * @return an iterator to the list of keys for which Solr documents should be updated.
    */
   @SuppressWarnings("unchecked")
-  public Set<Map<String, Object>> collectDelta(EntityProcessorWrapper epw, VariableResolverImpl resolver,
+  public Set<Map<String, Object>> collectDelta(EntityProcessorWrapper epw, VariableResolver resolver,
                                                Set<Map<String, Object>> deletedRows) {
     //someone called abort
     if (stop.get())
@@ -818,14 +820,16 @@ public class DocBuilder {
       // identifying deleted rows with deltas
 
       for (Map<String, Object> row : myModifiedPks) {
-        getModifiedParentRows(resolver.addNamespace(epw.getEntity().getName(), row), epw.getEntity().getName(), epw, parentKeyList);
+        resolver.addNamespace(epw.getEntity().getName(), row);
+        getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList);
         // check for abort
         if (stop.get())
           return new HashSet();
       }
       // running the same for deletedrows
       for (Map<String, Object> row : deletedSet) {
-        getModifiedParentRows(resolver.addNamespace(epw.getEntity().getName(), row), epw.getEntity().getName(), epw, parentKeyList);
+        resolver.addNamespace(epw.getEntity().getName(), row);
+        getModifiedParentRows(resolver, epw.getEntity().getName(), epw, parentKeyList);
         // check for abort
         if (stop.get())
           return new HashSet();
@@ -840,7 +844,7 @@ public class DocBuilder {
         myModifiedPks : new HashSet<Map<String, Object>>(parentKeyList);
   }
 
-  private void getModifiedParentRows(VariableResolverImpl resolver,
+  private void getModifiedParentRows(VariableResolver resolver,
                                      String entity, EntityProcessor entityProcessor,
                                      Set<Map<String, Object>> parentKeyList) {
     try {

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java Fri Nov 23 12:00:32 2012
@@ -48,7 +48,7 @@ public class EntityProcessorWrapper exte
   private boolean initalized;
   private String onError;
   private Context context;
-  private VariableResolverImpl resolver;
+  private VariableResolver resolver;
   private String entityName;
 
   protected List<Transformer> transformers;
@@ -65,7 +65,7 @@ public class EntityProcessorWrapper exte
   public void init(Context context) {
     rowcache = null;
     this.context = context;
-    resolver = (VariableResolverImpl) context.getVariableResolver();
+    resolver = (VariableResolver) context.getVariableResolver();
     if (entityName == null) {
       onError = resolver.replaceTokens(context.getEntityAttribute(ON_ERROR));
       if (onError == null) onError = ABORT;
@@ -171,7 +171,7 @@ public class EntityProcessorWrapper exte
     Map<String, Object> transformedRow = row;
     List<Map<String, Object>> rows = null;
     boolean stopTransform = checkStopTransform(row);
-    VariableResolverImpl resolver = (VariableResolverImpl) context.getVariableResolver();
+    VariableResolver resolver = (VariableResolver) context.getVariableResolver();
     for (Transformer t : transformers) {
       if (stopTransform) break;
       try {
@@ -288,8 +288,8 @@ public class EntityProcessorWrapper exte
     delegate.destroy();
   }
 
-  public VariableResolverImpl getVariableResolver() {
-    return (VariableResolverImpl) context.getVariableResolver();
+  public VariableResolver getVariableResolver() {
+    return (VariableResolver) context.getVariableResolver();
   }
 
   public Context getContext() {

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Evaluator.java Fri Nov 23 12:00:32 2012
@@ -16,6 +16,18 @@
  */
 package org.apache.solr.handler.dataimport;
 
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
+
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.TimeZone;
+import java.util.regex.Pattern;
+
+import org.apache.solr.util.DateMathParser;
+
 /**
  * <p>
  * Pluggable functions for resolving variables
@@ -43,4 +55,88 @@ public abstract class Evaluator {
    * @return the value of the given expression evaluated using the resolver
    */
   public abstract String evaluate(String expression, Context context);
+  
+  /**
+   * Parses a string of expression into separate params. The values are separated by commas. each value will be
+   * translated into one of the following:
+   * &lt;ol&gt;
+   * &lt;li&gt;If it is in single quotes the value will be translated to a String&lt;/li&gt;
+   * &lt;li&gt;If is is not in quotes and is a number a it will be translated into a Double&lt;/li&gt;
+   * &lt;li&gt;else it is a variable which can be resolved and it will be put in as an instance of VariableWrapper&lt;/li&gt;
+   * &lt;/ol&gt;
+   *
+   * @param expression the expression to be parsed
+   * @param vr the VariableResolver instance for resolving variables
+   *
+   * @return a List of objects which can either be a string, number or a variable wrapper
+   */
+  List<Object> parseParams(String expression, VariableResolver vr) {
+    List<Object> result = new ArrayList<Object>();
+    expression = expression.trim();
+    String[] ss = expression.split(",");
+    for (int i = 0; i < ss.length; i++) {
+      ss[i] = ss[i].trim();
+      if (ss[i].startsWith("'")) {//a string param has started
+        StringBuilder sb = new StringBuilder();
+        while (true) {
+          sb.append(ss[i]);
+          if (ss[i].endsWith("'")) break;
+          i++;
+          if (i >= ss.length)
+            throw new DataImportHandlerException(SEVERE, "invalid string at " + ss[i - 1] + " in function params: " + expression);
+          sb.append(",");
+        }
+        String s = sb.substring(1, sb.length() - 1);
+        s = s.replaceAll("\\\\'", "'");
+        result.add(s);
+      } else {
+        if (Character.isDigit(ss[i].charAt(0))) {
+          try {
+            Double doub = Double.parseDouble(ss[i]);
+            result.add(doub);
+          } catch (NumberFormatException e) {
+            if (vr.resolve(ss[i]) == null) {
+              wrapAndThrow(
+                      SEVERE, e, "Invalid number :" + ss[i] +
+                              "in parameters  " + expression);
+            }
+          }
+        } else {
+          result.add(new VariableWrapper(ss[i], vr));
+        }
+      }
+    }
+    return result;
+  }
+
+  static class VariableWrapper {
+    String varName;
+    VariableResolver vr;
+
+    public VariableWrapper(String s, VariableResolver vr) {
+      this.varName = s;
+      this.vr = vr;
+    }
+
+    public Object resolve() {
+      return vr.resolve(varName);
+
+    }
+
+    @Override
+    public String toString() {
+      Object o = vr.resolve(varName);
+      return o == null ? null : o.toString();
+    }
+  }
+
+  static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$");
+  
+  public static final String DATE_FORMAT_EVALUATOR = "formatDate";
+
+  public static final String URL_ENCODE_EVALUATOR = "encodeUrl";
+
+  public static final String ESCAPE_SOLR_QUERY_CHARS = "escapeQueryChars";
+
+  public static final String SQL_ESCAPE_EVALUATOR = "escapeSql";
 }

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java Fri Nov 23 12:00:32 2012
@@ -106,7 +106,7 @@ public class FieldReaderDataSource exten
   private Reader getReader(Blob blob)
           throws SQLException, UnsupportedEncodingException {
     if (encoding == null) {
-      return (new InputStreamReader(blob.getBinaryStream()));
+      return (new InputStreamReader(blob.getBinaryStream(), "UTF-8"));
     } else {
       return (new InputStreamReader(blob.getBinaryStream(), encoding));
     }

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java Fri Nov 23 12:00:32 2012
@@ -125,7 +125,7 @@ public class FileDataSource extends Data
   protected Reader openStream(File file) throws FileNotFoundException,
           UnsupportedEncodingException {
     if (encoding == null) {
-      return new InputStreamReader(new FileInputStream(file));
+      return new InputStreamReader(new FileInputStream(file), "UTF-8");
     } else {
       return new InputStreamReader(new FileInputStream(file), encoding);
     }

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java Fri Nov 23 12:00:32 2012
@@ -19,6 +19,7 @@ package org.apache.solr.handler.dataimpo
 import java.io.File;
 import java.io.FilenameFilter;
 import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -151,19 +152,19 @@ public class FileListEntityProcessor ext
     } else  {
       dateStr = context.replaceTokens(dateStr);
     }
-    m = EvaluatorBag.IN_SINGLE_QUOTES.matcher(dateStr);
+    m = Evaluator.IN_SINGLE_QUOTES.matcher(dateStr);
     if (m.find()) {
       String expr = null;
       expr = m.group(1).replaceAll("NOW", "");
       try {
-        return EvaluatorBag.dateMathParser.parseMath(expr);
+        return DateFormatEvaluator.getDateMathParser(Locale.ROOT).parseMath(expr);
       } catch (ParseException exp) {
         throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
                 "Invalid expression for date", exp);
       }
     }
     try {
-      return DataImporter.DATE_TIME_FORMAT.get().parse(dateStr);
+      return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).parse(dateStr);
     } catch (ParseException exp) {
       throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
               "Invalid expression for date", exp);

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/JdbcDataSource.java Fri Nov 23 12:00:32 2012
@@ -397,6 +397,12 @@ public class JdbcDataSource extends
   private void closeConnection()  {
     try {
       if (conn != null) {
+        try {
+          //SOLR-2045
+          conn.commit();
+        } catch(Exception ex) {
+          //ignore.
+        }
         conn.close();
       }
     } catch (Exception e) {

Modified: lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java?rev=1412849&r1=1412848&r2=1412849&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java (original)
+++ lucene/dev/branches/LUCENE-2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java Fri Nov 23 12:00:32 2012
@@ -68,7 +68,7 @@ public class NumberFormatTransformer ext
             throw new DataImportHandlerException(DataImportHandlerException.SEVERE, "Invalid Locale specified for field: " + fld);
           }
         } else {
-          locale = Locale.getDefault();
+          locale = Locale.ROOT;
         }
 
         Object val = row.get(srcCol);