You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/09 02:04:13 UTC

svn commit: r1068718 [12/21] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/contrib/ant/ dev-tools/maven/lucene/contrib/db/bdb-j...

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java Wed Feb  9 01:03:49 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.benchmark.byTa
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.text.DateFormat;
@@ -29,8 +29,8 @@ import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.Locale;
-import java.util.zip.GZIPInputStream;
 
+import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
 import org.apache.lucene.util.ThreadInterruptedException;
@@ -46,8 +46,10 @@ import org.apache.lucene.util.ThreadInte
  * <li><b>docs.dir</b> - specifies the directory where the TREC files reside.
  * Can be set to a relative path if "work.dir" is also specified
  * (<b>default=trec</b>).
+ * <li><b>trec.doc.parser</b> - specifies the {@link TrecDocParser} class to use for
+ * parsing the TREC documents content (<b>default=TrecGov2Parser</b>).
  * <li><b>html.parser</b> - specifies the {@link HTMLParser} class to use for
- * parsing the TREC documents content (<b>default=DemoHTMLParser</b>).
+ * parsing the HTML parts of the TREC documents content (<b>default=DemoHTMLParser</b>).
  * <li><b>content.source.encoding</b> - if not specified, ISO-8859-1 is used.
  * <li><b>content.source.excludeIteration</b> - if true, do not append iteration number to docname
  * </ul>
@@ -59,22 +61,24 @@ public class TrecContentSource extends C
     ParsePosition pos;
   }
 
-  private static final String DATE = "Date: ";
-  private static final String DOCHDR = "<DOCHDR>";
-  private static final String TERMINATING_DOCHDR = "</DOCHDR>";
-  private static final String DOCNO = "<DOCNO>";
-  private static final String TERMINATING_DOCNO = "</DOCNO>";
-  private static final String DOC = "<DOC>";
-  private static final String TERMINATING_DOC = "</DOC>";
+  public static final String DOCNO = "<DOCNO>";
+  public static final String TERMINATING_DOCNO = "</DOCNO>";
+  public static final String DOC = "<DOC>";
+  public static final String TERMINATING_DOC = "</DOC>";
 
-  private static final String NEW_LINE = System.getProperty("line.separator");
+  /** separator between lines in the byffer */ 
+  public static final String NEW_LINE = System.getProperty("line.separator");
 
   private static final String DATE_FORMATS [] = {
-       "EEE, dd MMM yyyy kk:mm:ss z",	  // Tue, 09 Dec 2003 22:39:08 GMT
-       "EEE MMM dd kk:mm:ss yyyy z",  	// Tue Dec 09 16:45:08 2003 EST
-       "EEE, dd-MMM-':'y kk:mm:ss z", 	// Tue, 09 Dec 2003 22:39:08 GMT
-       "EEE, dd-MMM-yyy kk:mm:ss z", 	  // Tue, 09 Dec 2003 22:39:08 GMT
-       "EEE MMM dd kk:mm:ss yyyy",  	  // Tue Dec 09 16:45:08 2003
+       "EEE, dd MMM yyyy kk:mm:ss z",   // Tue, 09 Dec 2003 22:39:08 GMT
+       "EEE MMM dd kk:mm:ss yyyy z",    // Tue Dec 09 16:45:08 2003 EST
+       "EEE, dd-MMM-':'y kk:mm:ss z",   // Tue, 09 Dec 2003 22:39:08 GMT
+       "EEE, dd-MMM-yyy kk:mm:ss z",    // Tue, 09 Dec 2003 22:39:08 GMT
+       "EEE MMM dd kk:mm:ss yyyy",      // Tue Dec 09 16:45:08 2003
+       "dd MMM yyyy",                   // 1 March 1994
+       "MMM dd, yyyy",                  // February 3, 1994
+       "yyMMdd",                        // 910513
+       "hhmm z.z.z. MMM dd, yyyy",       // 0901 u.t.c. April 28, 1994
   };
 
   private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>();
@@ -83,7 +87,7 @@ public class TrecContentSource extends C
   private File dataDir = null;
   private ArrayList<File> inputFiles = new ArrayList<File>();
   private int nextFile = 0;
-  private int rawDocSize;
+  private int rawDocSize = 0;
 
   // Use to synchronize threads on reading from the TREC documents.
   private Object lock = new Object();
@@ -92,7 +96,10 @@ public class TrecContentSource extends C
   BufferedReader reader;
   int iteration = 0;
   HTMLParser htmlParser;
+  
   private boolean excludeDocnameIteration;
+  private TrecDocParser trecDocParser = new TrecGov2Parser(); // default
+  ParsePathType currPathType; // not private for tests
   
   private DateFormatInfo getDateFormatInfo() {
     DateFormatInfo dfi = dateFormats.get();
@@ -118,7 +125,7 @@ public class TrecContentSource extends C
     return sb;
   }
   
-  private Reader getTrecDocReader(StringBuilder docBuffer) {
+  Reader getTrecDocReader(StringBuilder docBuffer) {
     StringBuilderReader r = trecDocReader.get();
     if (r == null) {
       r = new StringBuilderReader(docBuffer);
@@ -129,10 +136,21 @@ public class TrecContentSource extends C
     return r;
   }
 
-  // read until finding a line that starts with the specified prefix, or a terminating tag has been found.
-  private void read(StringBuilder buf, String prefix, boolean collectMatchLine,
-                    boolean collectAll, String terminatingTag)
-      throws IOException, NoMoreDataException {
+  HTMLParser getHtmlParser() {
+    return htmlParser;
+  }
+  
+  /**
+   * Read until a line starting with the specified <code>lineStart</code>.
+   * @param buf buffer for collecting the data if so specified/ 
+   * @param lineStart line start to look for, must not be null.
+   * @param collectMatchLine whether to collect the matching line into <code>buffer</code>.
+   * @param collectAll whether to collect all lines into <code>buffer</code>.
+   * @throws IOException
+   * @throws NoMoreDataException
+   */
+   private void read(StringBuilder buf, String lineStart, 
+       boolean collectMatchLine, boolean collectAll) throws IOException, NoMoreDataException {
     String sep = "";
     while (true) {
       String line = reader.readLine();
@@ -144,20 +162,12 @@ public class TrecContentSource extends C
 
       rawDocSize += line.length();
 
-      if (line.startsWith(prefix)) {
+      if (lineStart!=null && line.startsWith(lineStart)) {
         if (collectMatchLine) {
           buf.append(sep).append(line);
           sep = NEW_LINE;
         }
-        break;
-      }
-
-      if (terminatingTag != null && line.startsWith(terminatingTag)) {
-        // didn't find the prefix that was asked, but the terminating
-        // tag was found. set the length to 0 to signal no match was
-        // found.
-        buf.setLength(0);
-        break;
+        return;
       }
 
       if (collectAll) {
@@ -169,7 +179,7 @@ public class TrecContentSource extends C
   
   void openNextFile() throws NoMoreDataException, IOException {
     close();
-    int retries = 0;
+    currPathType = null;
     while (true) {
       if (nextFile >= inputFiles.size()) { 
         // exhausted files, start a new round, unless forever set to false.
@@ -184,13 +194,13 @@ public class TrecContentSource extends C
         System.out.println("opening: " + f + " length: " + f.length());
       }
       try {
-        GZIPInputStream zis = new GZIPInputStream(new FileInputStream(f), BUFFER_SIZE);
-        reader = new BufferedReader(new InputStreamReader(zis, encoding), BUFFER_SIZE);
+        InputStream inputStream = getInputStream(f); // support either gzip, bzip2, or regular text file, by extension  
+        reader = new BufferedReader(new InputStreamReader(inputStream, encoding), BUFFER_SIZE);
+        currPathType = TrecDocParser.pathType(f);
         return;
       } catch (Exception e) {
-        retries++;
-        if (retries < 20 && verbose) {
-          System.out.println("Skipping 'bad' file " + f.getAbsolutePath() + "  #retries=" + retries);
+        if (verbose) {
+          System.out.println("Skipping 'bad' file " + f.getAbsolutePath()+" due to "+e.getMessage());
           continue;
         }
         throw new NoMoreDataException();
@@ -198,7 +208,7 @@ public class TrecContentSource extends C
     }
   }
 
-  Date parseDate(String dateStr) {
+  public Date parseDate(String dateStr) {
     dateStr = dateStr.trim();
     DateFormatInfo dfi = getDateFormatInfo();
     for (int i = 0; i < dfi.dfs.length; i++) {
@@ -237,70 +247,47 @@ public class TrecContentSource extends C
 
   @Override
   public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
-    String dateStr = null, name = null;
-    Reader r = null;
+    String name = null;
+    StringBuilder docBuf = getDocBuffer();
+    ParsePathType parsedPathType;
+    
     // protect reading from the TREC files by multiple threads. The rest of the
-    // method, i.e., parsing the content and returning the DocData can run
-    // unprotected.
+    // method, i.e., parsing the content and returning the DocData can run unprotected.
     synchronized (lock) {
       if (reader == null) {
         openNextFile();
       }
-
-      StringBuilder docBuf = getDocBuffer();
       
-      // 1. skip until doc start
+      // 1. skip until doc start - required for all TREC formats
       docBuf.setLength(0);
-      read(docBuf, DOC, false, false, null);
-
-      // 2. name
+      read(docBuf, DOC, false, false);
+      
+      // save parsedFile for passing trecDataParser after the sync block, in 
+      // case another thread will open another file in between.
+      parsedPathType = currPathType;
+      
+      // 2. name - required for all TREC formats
       docBuf.setLength(0);
-      read(docBuf, DOCNO, true, false, null);
+      read(docBuf, DOCNO, true, false);
       name = docBuf.substring(DOCNO.length(), docBuf.indexOf(TERMINATING_DOCNO,
-          DOCNO.length()));
-      if (!excludeDocnameIteration)
+          DOCNO.length())).trim();
+      
+      if (!excludeDocnameIteration) {
         name = name + "_" + iteration;
-
-      // 3. skip until doc header
-      docBuf.setLength(0);
-      read(docBuf, DOCHDR, false, false, null);
-
-      boolean findTerminatingDocHdr = false;
-
-      // 4. date - look for the date only until /DOCHDR
-      docBuf.setLength(0);
-      read(docBuf, DATE, true, false, TERMINATING_DOCHDR);
-      if (docBuf.length() != 0) {
-        // Date found.
-        dateStr = docBuf.substring(DATE.length());
-        findTerminatingDocHdr = true;
       }
 
-      // 5. skip until end of doc header
-      if (findTerminatingDocHdr) {
-        docBuf.setLength(0);
-        read(docBuf, TERMINATING_DOCHDR, false, false, null);
-      }
-
-      // 6. collect until end of doc
+      // 3. read all until end of doc
       docBuf.setLength(0);
-      read(docBuf, TERMINATING_DOC, false, true, null);
-      
-      // 7. Set up a Reader over the read content
-      r = getTrecDocReader(docBuf);
-      // Resetting the thread's reader means it will reuse the instance
-      // allocated as well as re-read from docBuf.
-      r.reset();
-      
-      // count char length of parsed html text (larger than the plain doc body text).
-      addBytes(docBuf.length()); 
+      read(docBuf, TERMINATING_DOC, false, true);
     }
+      
+    // count char length of text to be parsed (may be larger than the resulted plain doc body text).
+    addBytes(docBuf.length()); 
 
     // This code segment relies on HtmlParser being thread safe. When we get 
     // here, everything else is already private to that thread, so we're safe.
-    Date date = dateStr != null ? parseDate(dateStr) : null;
     try {
-      docData = htmlParser.parse(docData, name, date, r, null);
+      docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
       addDoc();
     } catch (InterruptedException ie) {
       throw new ThreadInterruptedException(ie);
@@ -322,27 +309,40 @@ public class TrecContentSource extends C
   @Override
   public void setConfig(Config config) {
     super.setConfig(config);
+    // dirs
     File workDir = new File(config.get("work.dir", "work"));
     String d = config.get("docs.dir", "trec");
     dataDir = new File(d);
     if (!dataDir.isAbsolute()) {
       dataDir = new File(workDir, d);
     }
+    // files
     collectFiles(dataDir, inputFiles);
     if (inputFiles.size() == 0) {
       throw new IllegalArgumentException("No files in dataDir: " + dataDir);
     }
+    // trec doc parser
+    try {
+      String trecDocParserClassName = config.get("trec.doc.parser", "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser");
+      trecDocParser = Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance();
+    } catch (Exception e) {
+      // Should not get here. Throw runtime exception.
+      throw new RuntimeException(e);
+    }
+    // html parser
     try {
-      String parserClassName = config.get("html.parser",
+      String htmlParserClassName = config.get("html.parser",
           "org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
-      htmlParser = Class.forName(parserClassName).asSubclass(HTMLParser.class).newInstance();
+      htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance();
     } catch (Exception e) {
       // Should not get here. Throw runtime exception.
       throw new RuntimeException(e);
     }
+    // encoding
     if (encoding == null) {
       encoding = "ISO-8859-1";
     }
+    // iteration exclusion in doc name 
     excludeDocnameIteration = config.get("content.source.excludeIteration", false);
   }
 

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java Wed Feb  9 01:03:49 2011
@@ -59,7 +59,7 @@ public class NearRealtimeReaderTask exte
     }
     
     long t = System.currentTimeMillis();
-    IndexReader r = IndexReader.open(w);
+    IndexReader r = IndexReader.open(w, true);
     runData.setIndexReader(r);
     // Transfer our reference to runData
     r.decRef();

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java Wed Feb  9 01:03:49 2011
@@ -158,8 +158,10 @@ public class StringBuilderReader extends
     synchronized (lock) {
       this.sb = sb;
       length = sb.length();
+      next = mark = 0;
     }
   }
+  
   @Override
   public long skip(long ns) throws IOException {
     synchronized (lock) {

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java Wed Feb  9 01:03:49 2011
@@ -28,7 +28,7 @@ import org.apache.lucene.search.TopDocs;
 /**
  * Create a log ready for submission.
  * Extend this class and override
- * {@link #report(QualityQuery, TopDocs, String, Searcher)}
+ * {@link #report(QualityQuery, TopDocs, String, IndexSearcher)}
  * to create different reports. 
  */
 public class SubmissionReport {

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java Wed Feb  9 01:03:49 2011
@@ -29,146 +29,119 @@ import java.util.regex.Pattern;
 /**
  * Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
  */
-public class ExtractReuters
-{
-    private File reutersDir;
-    private File outputDir;
-    private static final String LINE_SEPARATOR = System.getProperty("line.separator");
-
-    public ExtractReuters(File reutersDir, File outputDir)
-    {
-        this.reutersDir = reutersDir;
-        this.outputDir = outputDir;
-        System.out.println("Deleting all files in " + outputDir);
-        File [] files = outputDir.listFiles();
-        for (int i = 0; i < files.length; i++)
-        {
-            files[i].delete();
-        }
-
+public class ExtractReuters {
+  private File reutersDir;
+  private File outputDir;
+  private static final String LINE_SEPARATOR = System.getProperty("line.separator");
+
+  public ExtractReuters(File reutersDir, File outputDir) {
+    this.reutersDir = reutersDir;
+    this.outputDir = outputDir;
+    System.out.println("Deleting all files in " + outputDir);
+    for (File f : outputDir.listFiles()) {
+      f.delete();
     }
+  }
 
-    public void extract()
-    {
-        File [] sgmFiles = reutersDir.listFiles(new FileFilter()
-        {
-            public boolean accept(File file)
-            {
-                return file.getName().endsWith(".sgm");
-            }
-        });
-        if (sgmFiles != null && sgmFiles.length > 0)
-        {
-            for (int i = 0; i < sgmFiles.length; i++)
-            {
-                File sgmFile = sgmFiles[i];
-                extractFile(sgmFile);
-            }
-        }
-        else
-        {
-            System.err.println("No .sgm files in " + reutersDir);
-        }
+  public void extract() {
+    File[] sgmFiles = reutersDir.listFiles(new FileFilter() {
+      public boolean accept(File file) {
+        return file.getName().endsWith(".sgm");
+      }
+    });
+    if (sgmFiles != null && sgmFiles.length > 0) {
+      for (File sgmFile : sgmFiles) {
+        extractFile(sgmFile);
+      }
+    } else {
+      System.err.println("No .sgm files in " + reutersDir);
     }
+  }
 
-    Pattern EXTRACTION_PATTERN = Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
-
-    private static String[] META_CHARS
-            = {"&", "<", ">", "\"", "'"};
+  Pattern EXTRACTION_PATTERN = Pattern
+      .compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
 
-    private static String[] META_CHARS_SERIALIZATIONS
-            = {"&amp;", "&lt;", "&gt;", "&quot;", "&apos;"};
+  private static String[] META_CHARS = { "&", "<", ">", "\"", "'" };
 
-    /**
-     * Override if you wish to change what is extracted
-     *
-     * @param sgmFile
-     */
-    protected void extractFile(File sgmFile)
-    {
-        try
-        {
-            BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
-
-            StringBuilder buffer = new StringBuilder(1024);
-            StringBuilder outBuffer = new StringBuilder(1024);
-
-            String line = null;
-            int docNumber = 0;
-            while ((line = reader.readLine()) != null)
-            {
-                //when we see a closing reuters tag, flush the file
-
-                if (line.indexOf("</REUTERS") == -1) {
-                    //Replace the SGM escape sequences
-
-                    buffer.append(line).append(' ');//accumulate the strings for now, then apply regular expression to get the pieces,
-                }
-                else
-                {
-                    //Extract the relevant pieces and write to a file in the output dir
-                    Matcher matcher = EXTRACTION_PATTERN.matcher(buffer);
-                    while (matcher.find())
-                    {
-                        for (int i = 1; i <= matcher.groupCount(); i++)
-                        {
-                            if (matcher.group(i) != null)
-                            {
-                                outBuffer.append(matcher.group(i));
-                            }
-                        }
-                        outBuffer.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-                    }
-                    String out = outBuffer.toString();
-                    for (int i = 0; i < META_CHARS_SERIALIZATIONS.length; i++)
-                    {
-                        out = out.replaceAll(META_CHARS_SERIALIZATIONS[i], META_CHARS[i]);
-                    }
-                    File outFile = new File(outputDir, sgmFile.getName() + "-" + (docNumber++) + ".txt");
-                    //System.out.println("Writing " + outFile);
-                    FileWriter writer = new FileWriter(outFile);
-                    writer.write(out);
-                    writer.close();
-                    outBuffer.setLength(0);
-                    buffer.setLength(0);
-                }
+  private static String[] META_CHARS_SERIALIZATIONS = { "&amp;", "&lt;",
+      "&gt;", "&quot;", "&apos;" };
+
+  /**
+   * Override if you wish to change what is extracted
+   * 
+   * @param sgmFile
+   */
+  protected void extractFile(File sgmFile) {
+    try {
+      BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
+
+      StringBuilder buffer = new StringBuilder(1024);
+      StringBuilder outBuffer = new StringBuilder(1024);
+
+      String line = null;
+      int docNumber = 0;
+      while ((line = reader.readLine()) != null) {
+        // when we see a closing reuters tag, flush the file
+
+        if (line.indexOf("</REUTERS") == -1) {
+          // Replace the SGM escape sequences
+
+          buffer.append(line).append(' ');// accumulate the strings for now,
+                                          // then apply regular expression to
+                                          // get the pieces,
+        } else {
+          // Extract the relevant pieces and write to a file in the output dir
+          Matcher matcher = EXTRACTION_PATTERN.matcher(buffer);
+          while (matcher.find()) {
+            for (int i = 1; i <= matcher.groupCount(); i++) {
+              if (matcher.group(i) != null) {
+                outBuffer.append(matcher.group(i));
+              }
             }
-            reader.close();
-        }
-
-        catch (
-                IOException e
-                )
-
-        {
-            throw new RuntimeException(e);
-        }
+            outBuffer.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
+          }
+          String out = outBuffer.toString();
+          for (int i = 0; i < META_CHARS_SERIALIZATIONS.length; i++) {
+            out = out.replaceAll(META_CHARS_SERIALIZATIONS[i], META_CHARS[i]);
+          }
+          File outFile = new File(outputDir, sgmFile.getName() + "-"
+              + (docNumber++) + ".txt");
+          // System.out.println("Writing " + outFile);
+          FileWriter writer = new FileWriter(outFile);
+          writer.write(out);
+          writer.close();
+          outBuffer.setLength(0);
+          buffer.setLength(0);
+        }
+      }
+      reader.close();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
+  }
 
-
-    public static void main(String[] args)
-    {
-        if (args.length != 2)
-        {
-            printUsage();
-        }
-        File reutersDir = new File(args[0]);
-
-        if (reutersDir.exists())
-        {
-            File outputDir = new File(args[1]);
-            outputDir.mkdirs();
-            ExtractReuters extractor = new ExtractReuters(reutersDir, outputDir);
-            extractor.extract();
-        }
-        else
-        {
-            printUsage();
-        }
+  public static void main(String[] args) {
+    if (args.length != 2) {
+      printUsage();
     }
-
-    private static void printUsage()
-    {
-        System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
+    File reutersDir = new File(args[0]);
+    if (!reutersDir.exists()) {
+      printUsage();
+      return;
     }
+    
+    // First, extract to a tmp directory and only if everything succeeds, rename
+    // to output directory.
+    File outputDir = new File(args[1] + "-tmp");
+    outputDir.mkdirs();
+    ExtractReuters extractor = new ExtractReuters(reutersDir, outputDir);
+    extractor.extract();
+    // Now rename to requested output dir
+    outputDir.renameTo(new File(args[1]));
+  }
+
+  private static void printUsage() {
+    System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
+  }
+  
 }

Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java Wed Feb  9 01:03:49 2011
@@ -18,14 +18,20 @@ package org.apache.lucene.benchmark.byTa
  */
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.io.IOException;
 import java.io.StringReader;
 import java.text.ParseException;
+import java.util.Arrays;
 import java.util.Date;
+import java.util.HashSet;
+import java.util.Properties;
 
+import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
 
 public class TrecContentSourceTest extends LuceneTestCase {
 
@@ -329,5 +335,62 @@ public class TrecContentSourceTest exten
     // Don't test that NoMoreDataException is thrown, since the forever flag is
     // turned on.
   }
+  
+  /** 
+   * Open a trec content source over a directory with files of all trec path types and all
+   * supported formats - bzip, gzip, txt. 
+   */
+  public void testTrecFeedDirAllTypes() throws Exception {
+    File dataDir =  _TestUtil.getTempDir("trecFeedAllTypes");
+    _TestUtil.unzip(getDataFile("trecdocs.zip"), dataDir);
+    TrecContentSource tcs = new TrecContentSource();
+    Properties props = new Properties();
+    props.setProperty("print.props", "false");
+    props.setProperty("content.source.verbose", "false");
+    props.setProperty("content.source.excludeIteration", "true");
+    props.setProperty("doc.maker.forever", "false");
+    props.setProperty("docs.dir", dataDir.getCanonicalPath().replace('\\','/')); 
+    props.setProperty("trec.doc.parser", TrecParserByPath.class.getName());
+    props.setProperty("content.source.forever", "false");
+    tcs.setConfig(new Config(props));
+    tcs.resetInputs();
+    DocData dd = new DocData();
+    int n = 0;
+    boolean gotExpectedException = false;
+    HashSet<ParsePathType> unseenTypes = new HashSet<ParsePathType>(Arrays.asList(ParsePathType.values()));
+    try {
+      while (n<100) { // arbiterary limit to prevent looping forever in case of test failure
+        dd = tcs.getNextDocData(dd);
+        ++n;
+        assertNotNull("doc data "+n+" should not be null!", dd);
+        unseenTypes.remove(tcs.currPathType);
+        switch(tcs.currPathType) {
+          case GOV2:
+            assertDocData(dd, "TEST-000", "TEST-000 title", "TEST-000 text", tcs.parseDate("Sun, 11 Jan 2009 08:00:00 GMT"));
+            break;
+          case FBIS:
+            assertDocData(dd, "TEST-001", "TEST-001 Title", "TEST-001 text", tcs.parseDate("1 January 1991"));
+            break;
+          case FR94:
+            // no title extraction in this source for now
+            assertDocData(dd, "TEST-002", null, "DEPARTMENT OF SOMETHING", tcs.parseDate("February 3, 1994"));
+            break;
+          case FT:
+            assertDocData(dd, "TEST-003", "Test-003 title", "Some pub text", tcs.parseDate("980424"));
+            break;
+          case LATIMES:
+            assertDocData(dd, "TEST-004", "Test-004 Title", "Some paragraph", tcs.parseDate("January 17, 1997, Sunday"));
+            break;
+          default:
+            assertTrue("Should never get here!", false);
+        }
+      }
+    } catch (NoMoreDataException e) {
+      gotExpectedException = true;
+    }
+    assertTrue("Should have gotten NoMoreDataException!", gotExpectedException);
+    assertEquals("Wrong numbre of documents created by osurce!",5,n);
+    assertTrue("Did not see all types!",unseenTypes.isEmpty());
+  }
 
 }

Modified: lucene/dev/branches/bulkpostings/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/CHANGES.txt?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/CHANGES.txt Wed Feb  9 01:03:49 2011
@@ -53,6 +53,15 @@ Upgrading from Solr 3.1-dev
   legacy behavior should set a default value for the 'mm' param in
   their solrconfig.xml file.
 
+* In previous releases, sorting on fields that are "multiValued" 
+  (either by explicit declaration in schema.xml or by implict behavior
+  because the "version" attribute on the schema was less then 1.2) did 
+  not generally work, but it would sometimes silently act as if it
+  succeeded and order the docs arbitrarily.  Solr will now fail on any
+  attempt to sort on a multivalued field
+
+
+
 Detailed Change List
 ----------------------
 
@@ -106,8 +115,6 @@ New Features
   Adding a parameter NOW=<time_in_ms> to the request will override the
   current time.  (Peter Sturge, yonik)
 
-* SOLR-2325: Allow tagging and exlcusion of main query for faceting. (yonik)
-
 Optimizations
 ----------------------
 
@@ -163,6 +170,18 @@ Bug Fixes
 
 * SOLR-482: Provide more exception handling in CSVLoader (gsingers)
 
+* SOLR-2320: Fixed ReplicationHandler detail reporting for masters
+  (hossman)
+
+* SOLR-2085: Improve SolrJ behavior when FacetComponent comes before
+  QueryComponent (Tomas Salfischberger via hossman)
+
+* SOLR-1940: Fix SolrDispatchFilter behavior when Content-Type is
+  unknown (Lance Norskog and hossman)
+
+* SOLR-2339: Fix sorting to explicitly generate an error if you
+  attempt to sort on a multiValued field. (hossman)
+
 Other Changes
 ----------------------
 
@@ -184,6 +203,10 @@ Other Changes
   using Generics where applicable in method/object declatations, and
   adding @SuppressWarnings("unchecked") when appropriate (hossman)
 
+* SOLR-2350: Since Solr no longer requires XML files to be in UTF-8
+  (see SOLR-96) SimplePostTool (aka: post.jar) has been improved to
+  work with files of any mime-type or charset. (hossman)
+
 Documentation
 ----------------------
 
@@ -395,7 +418,6 @@ New Features
   	     http://wiki.apache.org/solr/SpatialSearch and the example.  Refactored some items in Lucene spatial. 
 	     Removed SpatialTileField as the underlying CartesianTier is broken beyond repair and is going to be moved. (gsingers)
 
-
 * SOLR-2128: Full parameter substitution for function queries.
   Example: q=add($v1,$v2)&v1=mul(popularity,5)&v2=20.0
   (yonik)
@@ -416,6 +438,15 @@ New Features
 * SOLR-2129: Added a Solr module for dynamic metadata extraction/indexing with Apache UIMA.
   See contrib/uima/README.txt for more information.  (Tommaso Teofili via rmuir)
 
+* SOLR-2325: Allow tagging and exlcusion of main query for faceting. (yonik)
+
+* SOLR-2263: Add ability for RawResponseWriter to stream binary files as well as
+  text files.  (Eric Pugh via yonik)
+
+* SOLR-860: Add debug output for MoreLikeThis. (koji)
+
+* SOLR-1057: Add PathHierarchyTokenizerFactory. (ryan, koji)
+
 Optimizations
 ----------------------
 
@@ -513,7 +544,8 @@ Bug Fixes
 * SOLR-1711: SolrJ - StreamingUpdateSolrServer had a race condition that
   could halt the streaming of documents. The original patch to fix this
   (never officially released) introduced another hanging bug due to
-  connections not being released.  (Attila Babo, Erik Hetzner via yonik)
+  connections not being released.
+  (Attila Babo, Erik Hetzner, Johannes Tuchscherer via yonik)
   
 * SOLR-1748, SOLR-1747, SOLR-1746, SOLR-1745, SOLR-1744: Streams and Readers
   retrieved from ContentStreams are not closed in various places, resulting
@@ -637,6 +669,15 @@ Bug Fixes
 * SOLR-2261: fix velocity template layout.vm that referred to an older
   version of jquery.  (Eric Pugh via rmuir)
 
+* SOLR-1983: snappuller fails when modifiedConfFiles is not empty and
+  full copy of index is needed. (Alexander Kanarsky via yonik)
+
+* SOLR-2156: SnapPuller fails to clean Old Index Directories on Full Copy
+  (Jayendra Patil via yonik)
+
+* SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and
+  DocumentAnalysisRequestHandler to respect charset from XML file and only
+  use HTTP header's "Content-Type" as a "hint". (Uwe Schindler)
 
 Other Changes
 ----------------------
@@ -730,6 +771,12 @@ Other Changes
   
 * SOLR-2213: Upgrade to jQuery 1.4.3 (Erick Erickson via ryan)
 
+* SOLR-1826: Add unit tests for highlighting with termOffsets=true
+  and overlapping tokens. (Stefan Oestreicher via rmuir)
+
+* SOLR-2340: Add version infos to message in JavaBinCodec when throwing
+  exception. (koji)
+
 
 Build
 ----------------------

Modified: lucene/dev/branches/bulkpostings/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/NOTICE.txt?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/NOTICE.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/NOTICE.txt Wed Feb  9 01:03:49 2011
@@ -156,7 +156,6 @@ This product includes software developed
 
 See http://project.carrot2.org/
 
-
 =========================================================================
 ==     Guava Notice                                                    ==
 =========================================================================
@@ -168,6 +167,16 @@ This product includes software developed
 See http://code.google.com/p/guava-libraries/
 
 =========================================================================
+==     Prettify Notice                                                 ==
+=========================================================================
+
+Copyright ???? Google, Inc.
+
+This product includes software developed by the Google Prettify project.
+
+See http://code.google.com/p/google-code-prettify/
+
+=========================================================================
 ==     Jackson Notice                                                  ==
 =========================================================================
 Copyright ????

Modified: lucene/dev/branches/bulkpostings/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/build.xml Wed Feb  9 01:03:49 2011
@@ -18,6 +18,8 @@
 -->
 
 <project name="solr" default="usage" basedir="." xmlns:artifact="antlib:org.apache.maven.artifact.ant">
+
+  <property name="solr-path" value="." />
 	
   <import file="common-build.xml"/>
   
@@ -340,7 +342,7 @@
     <path refid="compile.classpath.solrj" />
     <pathelement location="${dest}/solr"/>
     <pathelement location="${dest}/solrj"/> <!-- include solrj -->
-    <pathelement location="${common-solr.dir}/../lucene/build/classes/test" />  <!-- include some lucene test code -->
+    <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" />  <!-- include some lucene test code -->
   </path>
 
   <path id="test.run.classpath">
@@ -348,7 +350,7 @@
     <pathelement location="${dest}/tests"/>
     <!-- include the solrj classpath and jetty files included in example -->
     <path refid="compile.classpath.solrj" />
-    <pathelement location="${common-solr.dir}/../lucene/build/classes/test" />  <!-- include some lucene test code -->
+    <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" />  <!-- include some lucene test code -->
     <pathelement path="${java.class.path}"/>
   </path>
 
@@ -956,6 +958,8 @@
     description="runs the tasks over src/java excluding the license directory">
     <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
       <fileset dir="src/java"/>
+      <fileset dir="src/test"/>
+      <fileset dir="src/webapp"/>
       <fileset dir="src/common"/>
       <fileset dir="src/solrj"/>
       <fileset dir="client">
@@ -963,6 +967,8 @@
       </fileset>
       <fileset dir="contrib/dataimporthandler/src/main/java"/>
       <fileset dir="contrib/dataimporthandler/src/test/java"/>
+      <fileset dir="contrib/dataimporthandler/src/extras/main/java"/>
+      <fileset dir="contrib/dataimporthandler/src/extras/test/java"/>
       <fileset dir="contrib/clustering/src/main/java"/>
       <fileset dir="contrib/clustering/src/test/java"/>
       <fileset dir="contrib/extraction/src/main/java"/>

Modified: lucene/dev/branches/bulkpostings/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/common-build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/common-build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/common-build.xml Wed Feb  9 01:03:49 2011
@@ -33,6 +33,9 @@
 
   <property name="name" value="${ant.project.name}"/>
 
+  <property name="dev-tools.dir" value="${solr-path}/../dev-tools"/>
+  <property name="prettify.dir" value="${dev-tools.dir}/prettify"/>
+
   <tstamp>
     <format property="year" pattern="yyyy"/>
     <format property="DSTAMP" pattern="yyyy-MM-dd"/>
@@ -344,6 +347,9 @@
     <attribute name="destdir"/>
   	<attribute name="title" default="${Name} ${version} API (${specversion})"/>
     <sequential>
+      <copy todir="@{destdir}/../prettify" overwrite="false">
+        <fileset dir="${prettify.dir}"/>
+      </copy>
       <javadoc
           packagenames="org.apache.solr.*"
           failonerror="true"
@@ -357,6 +363,7 @@
           link="${javadoc.link.java}"
           windowtitle="${Name} ${version} API"
           doctitle="@{title}"
+          stylesheetfile="@{destdir}/../prettify/stylesheet+prettify.css"
           bottom="Copyright &amp;copy; ${year} Apache Software Foundation.  All Rights Reserved.">
         <tag name="todo" description="To Do:"/>
         <tag name="uml.property" description="UML Property:"/>
@@ -368,6 +375,10 @@
         <link href="${javadoc.link.java}"/>
         <link href="${javadoc.link.junit}"/>
         <link href="${javadoc.link.lucene}"/>
+      	<header><![CDATA[
+      		 <script src="{@docRoot}/../prettify/prettify.js" type="text/javascript"></script>
+      		 <script language="JavaScript">window.onload=function(){windowTitle();prettyPrint();}</script>
+      	]]></header>
 
         <sources />
 

Modified: lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml Wed Feb  9 01:03:49 2011
@@ -73,7 +73,7 @@
     <pathelement path="${dest}/test-classes"/>
     <pathelement path="${java.class.path}"/>
     <pathelement location="${common-solr.dir}/build/tests"/> <!-- include solr test code -->
-    <pathelement location="${common-solr.dir}/../lucene/build/classes/test" />  <!-- include some lucene test code -->
+    <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" />  <!-- include some lucene test code -->
     <path refid="common.classpath"/>
   </path>
 

Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml Wed Feb  9 01:03:49 2011
@@ -42,7 +42,7 @@
     <pathelement path="${dest}/test-classes"/>
     <pathelement path="${java.class.path}"/>
     <pathelement location="${common-solr.dir}/build/tests"/> <!-- include solr test code -->
-    <pathelement location="${common-solr.dir}/../lucene/build/classes/test" />  <!-- include some lucene test code -->
+    <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" />  <!-- include some lucene test code -->
     <path refid="common.classpath"/>
     <!-- DistributedClusteringComponentTest uses Jetty -->
     <fileset dir="${solr-path}/example/lib">

Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java Wed Feb  9 01:03:49 2011
@@ -46,6 +46,13 @@ import java.util.Set;
  * <p/>
  * This engine is experimental.  Output from this engine is subject to change in future releases.
  *
+ * <pre class="prettyprint" >
+ * &lt;searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering"&gt;
+ *   &lt;lst name="engine"&gt;
+ *     &lt;str name="name"&gt;default&lt;/str&gt;
+ *     &lt;str name="carrot.algorithm"&gt;org.carrot2.clustering.lingo.LingoClusteringAlgorithm&lt;/str&gt;
+ *   &lt;/lst&gt;
+ * &lt;/searchComponent&gt;</pre>
  */
 public class ClusteringComponent extends SearchComponent implements SolrCoreAware {
   private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class);

Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Wed Feb  9 01:03:49 2011
@@ -77,6 +77,7 @@ public class CarrotClusteringEngine exte
 
   private String idFieldName;
 
+  @Override
   @Deprecated
   public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
     SolrIndexSearcher searcher = sreq.getSearcher();
@@ -90,6 +91,7 @@ public class CarrotClusteringEngine exte
     }
   }
 
+  @Override
   public Object cluster(Query query, SolrDocumentList solrDocList,
       Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
     try {

Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java Wed Feb  9 01:03:49 2011
@@ -69,7 +69,8 @@ public class LuceneLanguageModelFactory 
 	/**
 	 * Provide an {@link IStemmer} implementation for a given language.
 	 */
-	protected IStemmer createStemmer(LanguageCode language) {
+	@Override
+  protected IStemmer createStemmer(LanguageCode language) {
 		switch (language) {
 		case ARABIC:
 			return ArabicStemmerFactory.createStemmer();

Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java Wed Feb  9 01:03:49 2011
@@ -25,11 +25,13 @@ import org.apache.solr.search.DocSet;
  *
  **/
 public class MockDocumentClusteringEngine extends DocumentClusteringEngine {
+  @Override
   public NamedList cluster(DocSet docs, SolrParams solrParams) {
     NamedList result = new NamedList();
     return result;
   }
 
+  @Override
   public NamedList cluster(SolrParams solrParams) {
     NamedList result = new NamedList();
     return result;

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml Wed Feb  9 01:03:49 2011
@@ -56,7 +56,7 @@
 	  <pathelement path="target/classes" />
   	<pathelement path="target/test-classes" />
     <pathelement location="${solr-path}/build/tests"/> <!-- include solr test code -->
-    <pathelement location="${solr-path}/../lucene/build/classes/test" />  <!-- include some lucene test code -->
+    <pathelement location="${solr-path}/../lucene/build/classes/test-framework" />  <!-- include some lucene test code -->
     <pathelement path="${java.class.path}"/>
   </path>
 
@@ -68,7 +68,7 @@
   	<pathelement path="target/test-classes" />
   	<pathelement path="target/extras/test-classes" />
     <pathelement location="${solr-path}/build/tests"/> <!-- include solr test code -->
-    <pathelement location="${solr-path}/../lucene/build/classes/test" />  <!-- include some lucene test code -->
+    <pathelement location="${solr-path}/../lucene/build/classes/test-framework" />  <!-- include some lucene test code -->
     <pathelement path="${java.class.path}"/>
   </path>
 	

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java Wed Feb  9 01:03:49 2011
@@ -51,6 +51,7 @@ public class MailEntityProcessor extends
     public SearchTerm getCustomSearch(Folder folder);
   }
 
+  @Override
   public void init(Context context) {
     super.init(context);
     // set attributes using  XXX getXXXFromContext(attribute, defualtValue);
@@ -95,6 +96,7 @@ public class MailEntityProcessor extends
     logConfig();
   }
 
+  @Override
   public Map<String, Object> nextRow() {
     Message mail;
     Map<String, Object> row = null;

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java Wed Feb  9 01:03:49 2011
@@ -92,6 +92,7 @@ public class TikaEntityProcessor extends
     done = false;
   }
 
+  @Override
   public Map<String, Object> nextRow() {
     if(done) return null;
     Map<String, Object> row = new HashMap<String, Object>();

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java Wed Feb  9 01:03:49 2011
@@ -191,18 +191,22 @@ public class TestMailEntityProcessor ext
       super(null, ".", null);
     }
 
+    @Override
     public boolean upload(SolrInputDocument doc) {
       return docs.add(doc);
     }
 
+    @Override
     public void log(int event, String name, Object row) {
       // Do nothing
     }
 
+    @Override
     public void doDeleteAll() {
       deleteAllCalled = Boolean.TRUE;
     }
 
+    @Override
     public void commit(boolean b) {
       commitCalled = Boolean.TRUE;
     }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java Wed Feb  9 01:03:49 2011
@@ -39,10 +39,12 @@ public class BinContentStreamDataSource 
   private InputStream in;
 
 
+  @Override
   public void init(Context context, Properties initProps) {
     this.context = (ContextImpl) context;
   }
 
+  @Override
   public InputStream getData(String query) {
      contentStream = context.getDocBuilder().requestParameters.contentStream;
     if (contentStream == null)
@@ -55,6 +57,7 @@ public class BinContentStreamDataSource 
     }
   }
 
+  @Override
   public void close() {
      if (contentStream != null) {
       try {

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java Wed Feb  9 01:03:49 2011
@@ -43,10 +43,12 @@ import java.util.Properties;
 
 public class BinFileDataSource extends DataSource<InputStream>{
    protected String basePath;
+  @Override
   public void init(Context context, Properties initProps) {
      basePath = initProps.getProperty(FileDataSource.BASE_PATH);
   }
 
+  @Override
   public InputStream getData(String query) {
     File f = FileDataSource.getFile(basePath,query);
     try {
@@ -57,6 +59,7 @@ public class BinFileDataSource extends D
     }
   }
 
+  @Override
   public void close() {
 
   }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java Wed Feb  9 01:03:49 2011
@@ -49,6 +49,7 @@ public class BinURLDataSource extends Da
 
   public BinURLDataSource() { }
 
+  @Override
   public void init(Context context, Properties initProps) {
       this.context = context;
     this.initProps = initProps;
@@ -72,6 +73,7 @@ public class BinURLDataSource extends Da
     }
   }
 
+  @Override
   public InputStream getData(String query) {
     URL url = null;
     try {
@@ -89,6 +91,7 @@ public class BinURLDataSource extends Da
     }
   }
 
+  @Override
   public void close() { }
 
   private String getInitPropWithReplacements(String propertyName) {

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java Wed Feb  9 01:03:49 2011
@@ -38,6 +38,7 @@ import java.util.Map;
 public class CachedSqlEntityProcessor extends SqlEntityProcessor {
   private boolean isFirst;
 
+  @Override
   @SuppressWarnings("unchecked")
   public void init(Context context) {
     super.init(context);
@@ -45,6 +46,7 @@ public class CachedSqlEntityProcessor ex
     isFirst = true;
   }
 
+  @Override
   public Map<String, Object> nextRow() {
     if (dataSourceRowCache != null)
       return getFromRowCacheTransformed();
@@ -60,6 +62,7 @@ public class CachedSqlEntityProcessor ex
 
   }
 
+  @Override
   protected List<Map<String, Object>> getAllNonCachedRows() {
     List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
     String q = getQuery();

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java Wed Feb  9 01:03:49 2011
@@ -37,6 +37,7 @@ import java.util.Map;
  * @since solr 1.4
  */
 public class ClobTransformer extends Transformer {
+  @Override
   public Object transformRow(Map<String, Object> aRow, Context context) {
     for (Map<String, String> map : context.getAllEntityFields()) {
       if (!TRUE.equals(map.get(CLOB))) continue;

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java Wed Feb  9 01:03:49 2011
@@ -39,10 +39,12 @@ public class ContentStreamDataSource ext
   private ContentStream contentStream;
   private Reader reader;
 
+  @Override
   public void init(Context context, Properties initProps) {
     this.context = (ContextImpl) context;
   }
 
+  @Override
   public Reader getData(String query) {
     contentStream = context.getDocBuilder().requestParameters.contentStream;
     if (contentStream == null)
@@ -55,6 +57,7 @@ public class ContentStreamDataSource ext
     }
   }
 
+  @Override
   public void close() {
     if (contentStream != null) {
       try {

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java Wed Feb  9 01:03:49 2011
@@ -71,22 +71,27 @@ public class ContextImpl extends Context
     parent = parentContext;
   }
 
+  @Override
   public String getEntityAttribute(String name) {
     return entity == null ? null : entity.allAttributes.get(name);
   }
 
+  @Override
   public String getResolvedEntityAttribute(String name) {
     return entity == null ? null : resolver.replaceTokens(entity.allAttributes.get(name));
   }
 
+  @Override
   public List<Map<String, String>> getAllEntityFields() {
     return entity == null ? Collections.EMPTY_LIST : entity.allFieldsList;
   }
 
+  @Override
   public VariableResolver getVariableResolver() {
     return resolver;
   }
 
+  @Override
   public DataSource getDataSource() {
     if (ds != null) return ds;
     if(entity == null) return  null;
@@ -101,26 +106,32 @@ public class ContextImpl extends Context
     return entity.dataSrc;
   }
 
+  @Override
   public DataSource getDataSource(String name) {
     return dataImporter.getDataSourceInstance(entity, name, this);
   }
 
+  @Override
   public boolean isRootEntity() {
     return entity.isDocRoot;
   }
 
+  @Override
   public String currentProcess() {
     return currProcess;
   }
 
+  @Override
   public Map<String, Object> getRequestParameters() {
     return requestParams;
   }
 
+  @Override
   public EntityProcessor getEntityProcessor() {
     return entity == null ? null : entity.processor;
   }
 
+  @Override
   public void setSessionAttribute(String name, Object val, String scope) {
     if(name == null) return;
     if (Context.SCOPE_ENTITY.equals(scope)) {
@@ -148,6 +159,7 @@ public class ContextImpl extends Context
     else entitySession.put(name, val);
   }
 
+  @Override
   public Object getSessionAttribute(String name, String scope) {
     if (Context.SCOPE_ENTITY.equals(scope)) {
       if (entitySession == null)
@@ -166,6 +178,7 @@ public class ContextImpl extends Context
     return null;
   }
 
+  @Override
   public Context getParentContext() {
     return parent;
   }
@@ -187,15 +200,18 @@ public class ContextImpl extends Context
   }
 
 
+  @Override
   public SolrCore getSolrCore() {
     return dataImporter == null ? null : dataImporter.getCore();
   }
 
 
+  @Override
   public Map<String, Object> getStats() {
     return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.<String, Object>emptyMap();
   }
 
+  @Override
   public String getScript() {
     if(dataImporter != null) {
       DataConfig.Script script = dataImporter.getConfig().script;
@@ -204,6 +220,7 @@ public class ContextImpl extends Context
     return null;
   }
 
+  @Override
   public String getScriptLanguage() {
     if (dataImporter != null) {
       DataConfig.Script script = dataImporter.getConfig().script;
@@ -212,12 +229,14 @@ public class ContextImpl extends Context
     return null;
   }
 
+  @Override
   public void deleteDoc(String id) {
     if(docBuilder != null){
       docBuilder.writer.deleteDoc(id);
     }
   }
 
+  @Override
   public void deleteDocByQuery(String query) {
     if(docBuilder != null){
       docBuilder.writer.deleteByQuery(query);
@@ -227,10 +246,12 @@ public class ContextImpl extends Context
   DocBuilder getDocBuilder(){
     return docBuilder;
   }
+  @Override
   public Object resolve(String var) {
     return resolver.resolve(var);
   }
 
+  @Override
   public String replaceTokens(String template) {
     return resolver.replaceTokens(template);
   }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java Wed Feb  9 01:03:49 2011
@@ -214,6 +214,7 @@ public class DataConfig {
 
 
     public Map<String, String> allAttributes = new HashMap<String, String>() {
+      @Override
       public String put(String key, String value) {
         if (super.containsKey(key))
           return super.get(key);

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java Wed Feb  9 01:03:49 2011
@@ -423,6 +423,7 @@ public class DataImporter {
   }
 
   static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
+    @Override
     protected AtomicLong initialValue() {
       return new AtomicLong();
     }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java Wed Feb  9 01:03:49 2011
@@ -45,6 +45,7 @@ public class DateFormatTransformer exten
   private static final Logger LOG = LoggerFactory
           .getLogger(DateFormatTransformer.class);
 
+  @Override
   @SuppressWarnings("unchecked")
   public Object transformRow(Map<String, Object> aRow, Context context) {
 

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java Wed Feb  9 01:03:49 2011
@@ -60,6 +60,7 @@ class DebugLogger {
     output = new NamedList();
     debugStack = new Stack<DebugInfo>() {
 
+      @Override
       public DebugInfo pop() {
         if (size() == 1)
           throw new DataImportHandlerException(
@@ -169,14 +170,17 @@ class DebugLogger {
 
   DataSource wrapDs(final DataSource ds) {
     return new DataSource() {
+      @Override
       public void init(Context context, Properties initProps) {
         ds.init(context, initProps);
       }
 
+      @Override
       public void close() {
         ds.close();
       }
 
+      @Override
       public Object getData(String query) {
         writer.log(SolrWriter.ENTITY_META, "query", query);
         long start = System.currentTimeMillis();
@@ -203,6 +207,7 @@ class DebugLogger {
 
   Transformer wrapTransformer(final Transformer t) {
     return new Transformer() {
+      @Override
       public Object transformRow(Map<String, Object> row, Context context) {
         writer.log(SolrWriter.PRE_TRANSFORMER_ROW, null, row);
         String tName = getTransformerName(t);

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java Wed Feb  9 01:03:49 2011
@@ -139,6 +139,7 @@ public class DocBuilder {
     document = dataImporter.getConfig().document;
     final AtomicLong startTime = new AtomicLong(System.currentTimeMillis());
     statusMessages.put(TIME_ELAPSED, new Object() {
+      @Override
       public String toString() {
         return getTimeElapsedSince(startTime.get());
       }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java Wed Feb  9 01:03:49 2011
@@ -49,6 +49,7 @@ public class EntityProcessorBase extends
   protected String onError = ABORT;
 
 
+  @Override
   public void init(Context context) {
     rowIterator = null;
     this.context = context;
@@ -86,14 +87,17 @@ public class EntityProcessorBase extends
     }
   }
 
+  @Override
   public Map<String, Object> nextModifiedRowKey() {
     return null;
   }
 
+  @Override
   public Map<String, Object> nextDeletedRowKey() {
     return null;
   }
 
+  @Override
   public Map<String, Object> nextModifiedParentRowKey() {
     return null;
   }
@@ -105,11 +109,13 @@ public class EntityProcessorBase extends
    * @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return
    *         null to signal end of rows
    */
+  @Override
   public Map<String, Object> nextRow() {
     return null;// do not do anything
   }
 
 
+  @Override
   public void destroy() {
     /*no op*/
   }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java Wed Feb  9 01:03:49 2011
@@ -54,6 +54,7 @@ public class EntityProcessorWrapper exte
     this.docBuilder = docBuilder;
   }
 
+  @Override
   public void init(Context context) {
     rowcache = null;
     this.context = context;
@@ -79,6 +80,7 @@ public class EntityProcessorWrapper exte
 
     String[] transArr = transClasses.split(",");
     transformers = new ArrayList<Transformer>() {
+      @Override
       public boolean add(Transformer transformer) {
         if (docBuilder != null && docBuilder.verboseDebug) {
           transformer = docBuilder.writer.getDebugLogger().wrapTransformer(transformer);
@@ -135,6 +137,7 @@ public class EntityProcessorWrapper exte
       o = clazz.newInstance();
     }
 
+    @Override
     public Object transformRow(Map<String, Object> aRow, Context context) {
       try {
         return meth.invoke(o, aRow);
@@ -223,6 +226,7 @@ public class EntityProcessorWrapper exte
             && Boolean.parseBoolean(oMap.get("$stopTransform").toString());
   }
 
+  @Override
   public Map<String, Object> nextRow() {
     if (rowcache != null) {
       return getFromRowCache();
@@ -252,6 +256,7 @@ public class EntityProcessorWrapper exte
     }
   }
 
+  @Override
   public Map<String, Object> nextModifiedRowKey() {
     Map<String, Object> row = delegate.nextModifiedRowKey();
     row = applyTransformer(row);
@@ -259,6 +264,7 @@ public class EntityProcessorWrapper exte
     return row;
   }
 
+  @Override
   public Map<String, Object> nextDeletedRowKey() {
     Map<String, Object> row = delegate.nextDeletedRowKey();
     row = applyTransformer(row);
@@ -266,10 +272,12 @@ public class EntityProcessorWrapper exte
     return row;
   }
 
+  @Override
   public Map<String, Object> nextModifiedParentRowKey() {
     return delegate.nextModifiedParentRowKey();
   }
 
+  @Override
   public void destroy() {
     delegate.destroy();
   }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java Wed Feb  9 01:03:49 2011
@@ -66,6 +66,7 @@ public class EvaluatorBag {
    */
   public static Evaluator getSqlEscapingEvaluator() {
     return new Evaluator() {
+      @Override
       public String evaluate(String expression, Context context) {
         List l = parseParams(expression, context.getVariableResolver());
         if (l.size() != 1) {
@@ -90,6 +91,7 @@ public class EvaluatorBag {
    */
   public static Evaluator getSolrQueryEscapingEvaluator() {
     return new Evaluator() {
+      @Override
       public String evaluate(String expression, Context context) {
         List l = parseParams(expression, context.getVariableResolver());
         if (l.size() != 1) {
@@ -109,6 +111,7 @@ public class EvaluatorBag {
    */
   public static Evaluator getUrlEvaluator() {
     return new Evaluator() {
+      @Override
       public String evaluate(String expression, Context context) {
         List l = parseParams(expression, context.getVariableResolver());
         if (l.size() != 1) {
@@ -138,6 +141,7 @@ public class EvaluatorBag {
    */
   public static Evaluator getDateFormatEvaluator() {
     return new Evaluator() {
+      @Override
       public String evaluate(String expression, Context context) {
         List l = parseParams(expression, context.getVariableResolver());
         if (l.size() != 2) {
@@ -288,6 +292,7 @@ public class EvaluatorBag {
 
     }
 
+    @Override
     public String toString() {
       Object o = vr.resolve(varName);
       return o == null ? null : o.toString();

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java Wed Feb  9 01:03:49 2011
@@ -52,6 +52,7 @@ public class FieldReaderDataSource exten
   private String encoding;
   private EntityProcessorWrapper entityProcessor;
 
+  @Override
   public void init(Context context, Properties initProps) {
     dataField = context.getEntityAttribute("dataField");
     encoding = context.getEntityAttribute("encoding");
@@ -59,6 +60,7 @@ public class FieldReaderDataSource exten
     /*no op*/
   }
 
+  @Override
   public Reader getData(String query) {
     Object o = entityProcessor.getVariableResolver().resolve(dataField);
     if (o == null) {
@@ -111,6 +113,7 @@ public class FieldReaderDataSource exten
     }
   }
 
+  @Override
   public void close() {
 
   }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java Wed Feb  9 01:03:49 2011
@@ -52,12 +52,14 @@ public class FieldStreamDataSource exten
   protected String dataField;
   private EntityProcessorWrapper wrapper;
 
+  @Override
   public void init(Context context, Properties initProps) {
     dataField = context.getEntityAttribute("dataField");
     wrapper = (EntityProcessorWrapper) context.getEntityProcessor();
     /*no op*/
   }
 
+  @Override
   public InputStream getData(String query) {
     Object o = wrapper.getVariableResolver().resolve(dataField);
     if (o == null) {
@@ -90,6 +92,7 @@ public class FieldStreamDataSource exten
 
   }
 
+  @Override
   public void close() {
   }
 }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java Wed Feb  9 01:03:49 2011
@@ -59,6 +59,7 @@ public class FileDataSource extends Data
 
   private static final Logger LOG = LoggerFactory.getLogger(FileDataSource.class);
 
+  @Override
   public void init(Context context, Properties initProps) {
     basePath = initProps.getProperty(BASE_PATH);
     if (initProps.get(URLDataSource.ENCODING) != null)
@@ -79,6 +80,7 @@ public class FileDataSource extends Data
    * returned Reader</b>
    * </p>
    */
+  @Override
   public Reader getData(String query) {
     File f = getFile(basePath,query);
     try {
@@ -130,6 +132,7 @@ public class FileDataSource extends Data
     }
   }
 
+  @Override
   public void close() {
 
   }

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java Wed Feb  9 01:03:49 2011
@@ -106,6 +106,7 @@ public class FileListEntityProcessor ext
 
   private Pattern fileNamePattern, excludesPattern;
 
+  @Override
   public void init(Context context) {
     super.init(context);
     fileName = context.getEntityAttribute(FILE_NAME);
@@ -195,6 +196,7 @@ public class FileListEntityProcessor ext
     return Long.parseLong(sizeStr);
   }
 
+  @Override
   public Map<String, Object> nextRow() {
     if (rowIterator != null)
       return getNext();

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java Wed Feb  9 01:03:49 2011
@@ -54,6 +54,7 @@ public class JdbcDataSource extends
 
   private int maxRows = 0;
 
+  @Override
   public void init(Context context, Properties initProps) {
     Object o = initProps.get(CONVERT_TYPE);
     if (o != null)
@@ -204,6 +205,7 @@ public class JdbcDataSource extends
     }
   }
 
+  @Override
   public Iterator<Map<String, Object>> getData(String query) {
     ResultSetIterator r = new ResultSetIterator(query);
     return r.getIterator();
@@ -370,6 +372,7 @@ public class JdbcDataSource extends
     }
   }
 
+  @Override
   protected void finalize() throws Throwable {
     try {
       if(!isClosed){
@@ -383,6 +386,7 @@ public class JdbcDataSource extends
 
   private boolean isClosed = false;
 
+  @Override
   public void close() {
     try {
       closeConnection();

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java Wed Feb  9 01:03:49 2011
@@ -64,6 +64,7 @@ public class LineEntityProcessor extends
   /**
    * Parses each of the entity attributes.
    */
+  @Override
   public void init(Context context) {
     super.init(context);
     String s;
@@ -97,6 +98,7 @@ public class LineEntityProcessor extends
    * from the url. However transformers can be used to create as 
    * many other fields as required.
    */
+  @Override
   public Map<String, Object> nextRow() {
     if (reader == null) {
       reader = new BufferedReader((Reader) context.getDataSource().getData(url));

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java Wed Feb  9 01:03:49 2011
@@ -35,6 +35,7 @@ import java.util.Map;
 public class LogTransformer extends Transformer {
   Logger LOG = LoggerFactory.getLogger(LogTransformer.class);
 
+  @Override
   public Object transformRow(Map<String, Object> row, Context ctx) {
     String expr = ctx.getEntityAttribute(LOG_TEMPLATE);
     String level = ctx.replaceTokens(ctx.getEntityAttribute(LOG_LEVEL));

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java Wed Feb  9 01:03:49 2011
@@ -45,13 +45,16 @@ public class MockDataSource extends
     cache.clear();
   }
 
+  @Override
   public void init(Context context, Properties initProps) {
   }
 
+  @Override
   public Iterator<Map<String, Object>> getData(String query) {
     return cache.get(query);
   }
 
+  @Override
   public void close() {
     cache.clear();
 

Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java Wed Feb  9 01:03:49 2011
@@ -49,6 +49,7 @@ public class NumberFormatTransformer ext
 
   private static final Pattern localeRegex = Pattern.compile("^([a-z]{2})-([A-Z]{2})$");
 
+  @Override
   @SuppressWarnings("unchecked")
   public Object transformRow(Map<String, Object> row, Context context) {
     for (Map<String, String> fld : context.getAllEntityFields()) {