You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/09 02:04:13 UTC
svn commit: r1068718 [12/21] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/contrib/ant/
dev-tools/maven/lucene/contrib/db/bdb-j...
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java Wed Feb 9 01:03:49 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.benchmark.byTa
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.text.DateFormat;
@@ -29,8 +29,8 @@ import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Locale;
-import java.util.zip.GZIPInputStream;
+import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
import org.apache.lucene.util.ThreadInterruptedException;
@@ -46,8 +46,10 @@ import org.apache.lucene.util.ThreadInte
* <li><b>docs.dir</b> - specifies the directory where the TREC files reside.
* Can be set to a relative path if "work.dir" is also specified
* (<b>default=trec</b>).
+ * <li><b>trec.doc.parser</b> - specifies the {@link TrecDocParser} class to use for
+ * parsing the TREC documents content (<b>default=TrecGov2Parser</b>).
* <li><b>html.parser</b> - specifies the {@link HTMLParser} class to use for
- * parsing the TREC documents content (<b>default=DemoHTMLParser</b>).
+ * parsing the HTML parts of the TREC documents content (<b>default=DemoHTMLParser</b>).
* <li><b>content.source.encoding</b> - if not specified, ISO-8859-1 is used.
* <li><b>content.source.excludeIteration</b> - if true, do not append iteration number to docname
* </ul>
@@ -59,22 +61,24 @@ public class TrecContentSource extends C
ParsePosition pos;
}
- private static final String DATE = "Date: ";
- private static final String DOCHDR = "<DOCHDR>";
- private static final String TERMINATING_DOCHDR = "</DOCHDR>";
- private static final String DOCNO = "<DOCNO>";
- private static final String TERMINATING_DOCNO = "</DOCNO>";
- private static final String DOC = "<DOC>";
- private static final String TERMINATING_DOC = "</DOC>";
+ public static final String DOCNO = "<DOCNO>";
+ public static final String TERMINATING_DOCNO = "</DOCNO>";
+ public static final String DOC = "<DOC>";
+ public static final String TERMINATING_DOC = "</DOC>";
- private static final String NEW_LINE = System.getProperty("line.separator");
+ /** separator between lines in the byffer */
+ public static final String NEW_LINE = System.getProperty("line.separator");
private static final String DATE_FORMATS [] = {
- "EEE, dd MMM yyyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT
- "EEE MMM dd kk:mm:ss yyyy z", // Tue Dec 09 16:45:08 2003 EST
- "EEE, dd-MMM-':'y kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT
- "EEE, dd-MMM-yyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT
- "EEE MMM dd kk:mm:ss yyyy", // Tue Dec 09 16:45:08 2003
+ "EEE, dd MMM yyyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT
+ "EEE MMM dd kk:mm:ss yyyy z", // Tue Dec 09 16:45:08 2003 EST
+ "EEE, dd-MMM-':'y kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT
+ "EEE, dd-MMM-yyy kk:mm:ss z", // Tue, 09 Dec 2003 22:39:08 GMT
+ "EEE MMM dd kk:mm:ss yyyy", // Tue Dec 09 16:45:08 2003
+ "dd MMM yyyy", // 1 March 1994
+ "MMM dd, yyyy", // February 3, 1994
+ "yyMMdd", // 910513
+ "hhmm z.z.z. MMM dd, yyyy", // 0901 u.t.c. April 28, 1994
};
private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>();
@@ -83,7 +87,7 @@ public class TrecContentSource extends C
private File dataDir = null;
private ArrayList<File> inputFiles = new ArrayList<File>();
private int nextFile = 0;
- private int rawDocSize;
+ private int rawDocSize = 0;
// Use to synchronize threads on reading from the TREC documents.
private Object lock = new Object();
@@ -92,7 +96,10 @@ public class TrecContentSource extends C
BufferedReader reader;
int iteration = 0;
HTMLParser htmlParser;
+
private boolean excludeDocnameIteration;
+ private TrecDocParser trecDocParser = new TrecGov2Parser(); // default
+ ParsePathType currPathType; // not private for tests
private DateFormatInfo getDateFormatInfo() {
DateFormatInfo dfi = dateFormats.get();
@@ -118,7 +125,7 @@ public class TrecContentSource extends C
return sb;
}
- private Reader getTrecDocReader(StringBuilder docBuffer) {
+ Reader getTrecDocReader(StringBuilder docBuffer) {
StringBuilderReader r = trecDocReader.get();
if (r == null) {
r = new StringBuilderReader(docBuffer);
@@ -129,10 +136,21 @@ public class TrecContentSource extends C
return r;
}
- // read until finding a line that starts with the specified prefix, or a terminating tag has been found.
- private void read(StringBuilder buf, String prefix, boolean collectMatchLine,
- boolean collectAll, String terminatingTag)
- throws IOException, NoMoreDataException {
+ HTMLParser getHtmlParser() {
+ return htmlParser;
+ }
+
+ /**
+ * Read until a line starting with the specified <code>lineStart</code>.
+ * @param buf buffer for collecting the data if so specified/
+ * @param lineStart line start to look for, must not be null.
+ * @param collectMatchLine whether to collect the matching line into <code>buffer</code>.
+ * @param collectAll whether to collect all lines into <code>buffer</code>.
+ * @throws IOException
+ * @throws NoMoreDataException
+ */
+ private void read(StringBuilder buf, String lineStart,
+ boolean collectMatchLine, boolean collectAll) throws IOException, NoMoreDataException {
String sep = "";
while (true) {
String line = reader.readLine();
@@ -144,20 +162,12 @@ public class TrecContentSource extends C
rawDocSize += line.length();
- if (line.startsWith(prefix)) {
+ if (lineStart!=null && line.startsWith(lineStart)) {
if (collectMatchLine) {
buf.append(sep).append(line);
sep = NEW_LINE;
}
- break;
- }
-
- if (terminatingTag != null && line.startsWith(terminatingTag)) {
- // didn't find the prefix that was asked, but the terminating
- // tag was found. set the length to 0 to signal no match was
- // found.
- buf.setLength(0);
- break;
+ return;
}
if (collectAll) {
@@ -169,7 +179,7 @@ public class TrecContentSource extends C
void openNextFile() throws NoMoreDataException, IOException {
close();
- int retries = 0;
+ currPathType = null;
while (true) {
if (nextFile >= inputFiles.size()) {
// exhausted files, start a new round, unless forever set to false.
@@ -184,13 +194,13 @@ public class TrecContentSource extends C
System.out.println("opening: " + f + " length: " + f.length());
}
try {
- GZIPInputStream zis = new GZIPInputStream(new FileInputStream(f), BUFFER_SIZE);
- reader = new BufferedReader(new InputStreamReader(zis, encoding), BUFFER_SIZE);
+ InputStream inputStream = getInputStream(f); // support either gzip, bzip2, or regular text file, by extension
+ reader = new BufferedReader(new InputStreamReader(inputStream, encoding), BUFFER_SIZE);
+ currPathType = TrecDocParser.pathType(f);
return;
} catch (Exception e) {
- retries++;
- if (retries < 20 && verbose) {
- System.out.println("Skipping 'bad' file " + f.getAbsolutePath() + " #retries=" + retries);
+ if (verbose) {
+ System.out.println("Skipping 'bad' file " + f.getAbsolutePath()+" due to "+e.getMessage());
continue;
}
throw new NoMoreDataException();
@@ -198,7 +208,7 @@ public class TrecContentSource extends C
}
}
- Date parseDate(String dateStr) {
+ public Date parseDate(String dateStr) {
dateStr = dateStr.trim();
DateFormatInfo dfi = getDateFormatInfo();
for (int i = 0; i < dfi.dfs.length; i++) {
@@ -237,70 +247,47 @@ public class TrecContentSource extends C
@Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
- String dateStr = null, name = null;
- Reader r = null;
+ String name = null;
+ StringBuilder docBuf = getDocBuffer();
+ ParsePathType parsedPathType;
+
// protect reading from the TREC files by multiple threads. The rest of the
- // method, i.e., parsing the content and returning the DocData can run
- // unprotected.
+ // method, i.e., parsing the content and returning the DocData can run unprotected.
synchronized (lock) {
if (reader == null) {
openNextFile();
}
-
- StringBuilder docBuf = getDocBuffer();
- // 1. skip until doc start
+ // 1. skip until doc start - required for all TREC formats
docBuf.setLength(0);
- read(docBuf, DOC, false, false, null);
-
- // 2. name
+ read(docBuf, DOC, false, false);
+
+ // save parsedFile for passing trecDataParser after the sync block, in
+ // case another thread will open another file in between.
+ parsedPathType = currPathType;
+
+ // 2. name - required for all TREC formats
docBuf.setLength(0);
- read(docBuf, DOCNO, true, false, null);
+ read(docBuf, DOCNO, true, false);
name = docBuf.substring(DOCNO.length(), docBuf.indexOf(TERMINATING_DOCNO,
- DOCNO.length()));
- if (!excludeDocnameIteration)
+ DOCNO.length())).trim();
+
+ if (!excludeDocnameIteration) {
name = name + "_" + iteration;
-
- // 3. skip until doc header
- docBuf.setLength(0);
- read(docBuf, DOCHDR, false, false, null);
-
- boolean findTerminatingDocHdr = false;
-
- // 4. date - look for the date only until /DOCHDR
- docBuf.setLength(0);
- read(docBuf, DATE, true, false, TERMINATING_DOCHDR);
- if (docBuf.length() != 0) {
- // Date found.
- dateStr = docBuf.substring(DATE.length());
- findTerminatingDocHdr = true;
}
- // 5. skip until end of doc header
- if (findTerminatingDocHdr) {
- docBuf.setLength(0);
- read(docBuf, TERMINATING_DOCHDR, false, false, null);
- }
-
- // 6. collect until end of doc
+ // 3. read all until end of doc
docBuf.setLength(0);
- read(docBuf, TERMINATING_DOC, false, true, null);
-
- // 7. Set up a Reader over the read content
- r = getTrecDocReader(docBuf);
- // Resetting the thread's reader means it will reuse the instance
- // allocated as well as re-read from docBuf.
- r.reset();
-
- // count char length of parsed html text (larger than the plain doc body text).
- addBytes(docBuf.length());
+ read(docBuf, TERMINATING_DOC, false, true);
}
+
+ // count char length of text to be parsed (may be larger than the resulted plain doc body text).
+ addBytes(docBuf.length());
// This code segment relies on HtmlParser being thread safe. When we get
// here, everything else is already private to that thread, so we're safe.
- Date date = dateStr != null ? parseDate(dateStr) : null;
try {
- docData = htmlParser.parse(docData, name, date, r, null);
+ docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
addDoc();
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
@@ -322,27 +309,40 @@ public class TrecContentSource extends C
@Override
public void setConfig(Config config) {
super.setConfig(config);
+ // dirs
File workDir = new File(config.get("work.dir", "work"));
String d = config.get("docs.dir", "trec");
dataDir = new File(d);
if (!dataDir.isAbsolute()) {
dataDir = new File(workDir, d);
}
+ // files
collectFiles(dataDir, inputFiles);
if (inputFiles.size() == 0) {
throw new IllegalArgumentException("No files in dataDir: " + dataDir);
}
+ // trec doc parser
+ try {
+ String trecDocParserClassName = config.get("trec.doc.parser", "org.apache.lucene.benchmark.byTask.feeds.TrecGov2Parser");
+ trecDocParser = Class.forName(trecDocParserClassName).asSubclass(TrecDocParser.class).newInstance();
+ } catch (Exception e) {
+ // Should not get here. Throw runtime exception.
+ throw new RuntimeException(e);
+ }
+ // html parser
try {
- String parserClassName = config.get("html.parser",
+ String htmlParserClassName = config.get("html.parser",
"org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
- htmlParser = Class.forName(parserClassName).asSubclass(HTMLParser.class).newInstance();
+ htmlParser = Class.forName(htmlParserClassName).asSubclass(HTMLParser.class).newInstance();
} catch (Exception e) {
// Should not get here. Throw runtime exception.
throw new RuntimeException(e);
}
+ // encoding
if (encoding == null) {
encoding = "ISO-8859-1";
}
+ // iteration exclusion in doc name
excludeDocnameIteration = config.get("content.source.excludeIteration", false);
}
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java Wed Feb 9 01:03:49 2011
@@ -59,7 +59,7 @@ public class NearRealtimeReaderTask exte
}
long t = System.currentTimeMillis();
- IndexReader r = IndexReader.open(w);
+ IndexReader r = IndexReader.open(w, true);
runData.setIndexReader(r);
// Transfer our reference to runData
r.decRef();
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java Wed Feb 9 01:03:49 2011
@@ -158,8 +158,10 @@ public class StringBuilderReader extends
synchronized (lock) {
this.sb = sb;
length = sb.length();
+ next = mark = 0;
}
}
+
@Override
public long skip(long ns) throws IOException {
synchronized (lock) {
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java Wed Feb 9 01:03:49 2011
@@ -28,7 +28,7 @@ import org.apache.lucene.search.TopDocs;
/**
* Create a log ready for submission.
* Extend this class and override
- * {@link #report(QualityQuery, TopDocs, String, Searcher)}
+ * {@link #report(QualityQuery, TopDocs, String, IndexSearcher)}
* to create different reports.
*/
public class SubmissionReport {
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java Wed Feb 9 01:03:49 2011
@@ -29,146 +29,119 @@ import java.util.regex.Pattern;
/**
* Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
*/
-public class ExtractReuters
-{
- private File reutersDir;
- private File outputDir;
- private static final String LINE_SEPARATOR = System.getProperty("line.separator");
-
- public ExtractReuters(File reutersDir, File outputDir)
- {
- this.reutersDir = reutersDir;
- this.outputDir = outputDir;
- System.out.println("Deleting all files in " + outputDir);
- File [] files = outputDir.listFiles();
- for (int i = 0; i < files.length; i++)
- {
- files[i].delete();
- }
-
+public class ExtractReuters {
+ private File reutersDir;
+ private File outputDir;
+ private static final String LINE_SEPARATOR = System.getProperty("line.separator");
+
+ public ExtractReuters(File reutersDir, File outputDir) {
+ this.reutersDir = reutersDir;
+ this.outputDir = outputDir;
+ System.out.println("Deleting all files in " + outputDir);
+ for (File f : outputDir.listFiles()) {
+ f.delete();
}
+ }
- public void extract()
- {
- File [] sgmFiles = reutersDir.listFiles(new FileFilter()
- {
- public boolean accept(File file)
- {
- return file.getName().endsWith(".sgm");
- }
- });
- if (sgmFiles != null && sgmFiles.length > 0)
- {
- for (int i = 0; i < sgmFiles.length; i++)
- {
- File sgmFile = sgmFiles[i];
- extractFile(sgmFile);
- }
- }
- else
- {
- System.err.println("No .sgm files in " + reutersDir);
- }
+ public void extract() {
+ File[] sgmFiles = reutersDir.listFiles(new FileFilter() {
+ public boolean accept(File file) {
+ return file.getName().endsWith(".sgm");
+ }
+ });
+ if (sgmFiles != null && sgmFiles.length > 0) {
+ for (File sgmFile : sgmFiles) {
+ extractFile(sgmFile);
+ }
+ } else {
+ System.err.println("No .sgm files in " + reutersDir);
}
+ }
- Pattern EXTRACTION_PATTERN = Pattern.compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
-
- private static String[] META_CHARS
- = {"&", "<", ">", "\"", "'"};
+ Pattern EXTRACTION_PATTERN = Pattern
+ .compile("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>");
- private static String[] META_CHARS_SERIALIZATIONS
- = {"&", "<", ">", """, "'"};
+ private static String[] META_CHARS = { "&", "<", ">", "\"", "'" };
- /**
- * Override if you wish to change what is extracted
- *
- * @param sgmFile
- */
- protected void extractFile(File sgmFile)
- {
- try
- {
- BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
-
- StringBuilder buffer = new StringBuilder(1024);
- StringBuilder outBuffer = new StringBuilder(1024);
-
- String line = null;
- int docNumber = 0;
- while ((line = reader.readLine()) != null)
- {
- //when we see a closing reuters tag, flush the file
-
- if (line.indexOf("</REUTERS") == -1) {
- //Replace the SGM escape sequences
-
- buffer.append(line).append(' ');//accumulate the strings for now, then apply regular expression to get the pieces,
- }
- else
- {
- //Extract the relevant pieces and write to a file in the output dir
- Matcher matcher = EXTRACTION_PATTERN.matcher(buffer);
- while (matcher.find())
- {
- for (int i = 1; i <= matcher.groupCount(); i++)
- {
- if (matcher.group(i) != null)
- {
- outBuffer.append(matcher.group(i));
- }
- }
- outBuffer.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
- }
- String out = outBuffer.toString();
- for (int i = 0; i < META_CHARS_SERIALIZATIONS.length; i++)
- {
- out = out.replaceAll(META_CHARS_SERIALIZATIONS[i], META_CHARS[i]);
- }
- File outFile = new File(outputDir, sgmFile.getName() + "-" + (docNumber++) + ".txt");
- //System.out.println("Writing " + outFile);
- FileWriter writer = new FileWriter(outFile);
- writer.write(out);
- writer.close();
- outBuffer.setLength(0);
- buffer.setLength(0);
- }
+ private static String[] META_CHARS_SERIALIZATIONS = { "&", "<",
+ ">", """, "'" };
+
+ /**
+ * Override if you wish to change what is extracted
+ *
+ * @param sgmFile
+ */
+ protected void extractFile(File sgmFile) {
+ try {
+ BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
+
+ StringBuilder buffer = new StringBuilder(1024);
+ StringBuilder outBuffer = new StringBuilder(1024);
+
+ String line = null;
+ int docNumber = 0;
+ while ((line = reader.readLine()) != null) {
+ // when we see a closing reuters tag, flush the file
+
+ if (line.indexOf("</REUTERS") == -1) {
+ // Replace the SGM escape sequences
+
+ buffer.append(line).append(' ');// accumulate the strings for now,
+ // then apply regular expression to
+ // get the pieces,
+ } else {
+ // Extract the relevant pieces and write to a file in the output dir
+ Matcher matcher = EXTRACTION_PATTERN.matcher(buffer);
+ while (matcher.find()) {
+ for (int i = 1; i <= matcher.groupCount(); i++) {
+ if (matcher.group(i) != null) {
+ outBuffer.append(matcher.group(i));
+ }
}
- reader.close();
- }
-
- catch (
- IOException e
- )
-
- {
- throw new RuntimeException(e);
- }
+ outBuffer.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
+ }
+ String out = outBuffer.toString();
+ for (int i = 0; i < META_CHARS_SERIALIZATIONS.length; i++) {
+ out = out.replaceAll(META_CHARS_SERIALIZATIONS[i], META_CHARS[i]);
+ }
+ File outFile = new File(outputDir, sgmFile.getName() + "-"
+ + (docNumber++) + ".txt");
+ // System.out.println("Writing " + outFile);
+ FileWriter writer = new FileWriter(outFile);
+ writer.write(out);
+ writer.close();
+ outBuffer.setLength(0);
+ buffer.setLength(0);
+ }
+ }
+ reader.close();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
+ }
-
- public static void main(String[] args)
- {
- if (args.length != 2)
- {
- printUsage();
- }
- File reutersDir = new File(args[0]);
-
- if (reutersDir.exists())
- {
- File outputDir = new File(args[1]);
- outputDir.mkdirs();
- ExtractReuters extractor = new ExtractReuters(reutersDir, outputDir);
- extractor.extract();
- }
- else
- {
- printUsage();
- }
+ public static void main(String[] args) {
+ if (args.length != 2) {
+ printUsage();
}
-
- private static void printUsage()
- {
- System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
+ File reutersDir = new File(args[0]);
+ if (!reutersDir.exists()) {
+ printUsage();
+ return;
}
+
+ // First, extract to a tmp directory and only if everything succeeds, rename
+ // to output directory.
+ File outputDir = new File(args[1] + "-tmp");
+ outputDir.mkdirs();
+ ExtractReuters extractor = new ExtractReuters(reutersDir, outputDir);
+ extractor.extract();
+ // Now rename to requested output dir
+ outputDir.renameTo(new File(args[1]));
+ }
+
+ private static void printUsage() {
+ System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
+ }
+
}
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/TrecContentSourceTest.java Wed Feb 9 01:03:49 2011
@@ -18,14 +18,20 @@ package org.apache.lucene.benchmark.byTa
*/
import java.io.BufferedReader;
+import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.text.ParseException;
+import java.util.Arrays;
import java.util.Date;
+import java.util.HashSet;
+import java.util.Properties;
+import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
public class TrecContentSourceTest extends LuceneTestCase {
@@ -329,5 +335,62 @@ public class TrecContentSourceTest exten
// Don't test that NoMoreDataException is thrown, since the forever flag is
// turned on.
}
+
+ /**
+ * Open a trec content source over a directory with files of all trec path types and all
+ * supported formats - bzip, gzip, txt.
+ */
+ public void testTrecFeedDirAllTypes() throws Exception {
+ File dataDir = _TestUtil.getTempDir("trecFeedAllTypes");
+ _TestUtil.unzip(getDataFile("trecdocs.zip"), dataDir);
+ TrecContentSource tcs = new TrecContentSource();
+ Properties props = new Properties();
+ props.setProperty("print.props", "false");
+ props.setProperty("content.source.verbose", "false");
+ props.setProperty("content.source.excludeIteration", "true");
+ props.setProperty("doc.maker.forever", "false");
+ props.setProperty("docs.dir", dataDir.getCanonicalPath().replace('\\','/'));
+ props.setProperty("trec.doc.parser", TrecParserByPath.class.getName());
+ props.setProperty("content.source.forever", "false");
+ tcs.setConfig(new Config(props));
+ tcs.resetInputs();
+ DocData dd = new DocData();
+ int n = 0;
+ boolean gotExpectedException = false;
+ HashSet<ParsePathType> unseenTypes = new HashSet<ParsePathType>(Arrays.asList(ParsePathType.values()));
+ try {
+ while (n<100) { // arbiterary limit to prevent looping forever in case of test failure
+ dd = tcs.getNextDocData(dd);
+ ++n;
+ assertNotNull("doc data "+n+" should not be null!", dd);
+ unseenTypes.remove(tcs.currPathType);
+ switch(tcs.currPathType) {
+ case GOV2:
+ assertDocData(dd, "TEST-000", "TEST-000 title", "TEST-000 text", tcs.parseDate("Sun, 11 Jan 2009 08:00:00 GMT"));
+ break;
+ case FBIS:
+ assertDocData(dd, "TEST-001", "TEST-001 Title", "TEST-001 text", tcs.parseDate("1 January 1991"));
+ break;
+ case FR94:
+ // no title extraction in this source for now
+ assertDocData(dd, "TEST-002", null, "DEPARTMENT OF SOMETHING", tcs.parseDate("February 3, 1994"));
+ break;
+ case FT:
+ assertDocData(dd, "TEST-003", "Test-003 title", "Some pub text", tcs.parseDate("980424"));
+ break;
+ case LATIMES:
+ assertDocData(dd, "TEST-004", "Test-004 Title", "Some paragraph", tcs.parseDate("January 17, 1997, Sunday"));
+ break;
+ default:
+ assertTrue("Should never get here!", false);
+ }
+ }
+ } catch (NoMoreDataException e) {
+ gotExpectedException = true;
+ }
+ assertTrue("Should have gotten NoMoreDataException!", gotExpectedException);
+ assertEquals("Wrong numbre of documents created by osurce!",5,n);
+ assertTrue("Did not see all types!",unseenTypes.isEmpty());
+ }
}
Modified: lucene/dev/branches/bulkpostings/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/CHANGES.txt?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/CHANGES.txt Wed Feb 9 01:03:49 2011
@@ -53,6 +53,15 @@ Upgrading from Solr 3.1-dev
legacy behavior should set a default value for the 'mm' param in
their solrconfig.xml file.
+* In previous releases, sorting on fields that are "multiValued"
+ (either by explicit declaration in schema.xml or by implict behavior
+ because the "version" attribute on the schema was less then 1.2) did
+ not generally work, but it would sometimes silently act as if it
+ succeeded and order the docs arbitrarily. Solr will now fail on any
+ attempt to sort on a multivalued field
+
+
+
Detailed Change List
----------------------
@@ -106,8 +115,6 @@ New Features
Adding a parameter NOW=<time_in_ms> to the request will override the
current time. (Peter Sturge, yonik)
-* SOLR-2325: Allow tagging and exlcusion of main query for faceting. (yonik)
-
Optimizations
----------------------
@@ -163,6 +170,18 @@ Bug Fixes
* SOLR-482: Provide more exception handling in CSVLoader (gsingers)
+* SOLR-2320: Fixed ReplicationHandler detail reporting for masters
+ (hossman)
+
+* SOLR-2085: Improve SolrJ behavior when FacetComponent comes before
+ QueryComponent (Tomas Salfischberger via hossman)
+
+* SOLR-1940: Fix SolrDispatchFilter behavior when Content-Type is
+ unknown (Lance Norskog and hossman)
+
+* SOLR-2339: Fix sorting to explicitly generate an error if you
+ attempt to sort on a multiValued field. (hossman)
+
Other Changes
----------------------
@@ -184,6 +203,10 @@ Other Changes
using Generics where applicable in method/object declatations, and
adding @SuppressWarnings("unchecked") when appropriate (hossman)
+* SOLR-2350: Since Solr no longer requires XML files to be in UTF-8
+ (see SOLR-96) SimplePostTool (aka: post.jar) has been improved to
+ work with files of any mime-type or charset. (hossman)
+
Documentation
----------------------
@@ -395,7 +418,6 @@ New Features
http://wiki.apache.org/solr/SpatialSearch and the example. Refactored some items in Lucene spatial.
Removed SpatialTileField as the underlying CartesianTier is broken beyond repair and is going to be moved. (gsingers)
-
* SOLR-2128: Full parameter substitution for function queries.
Example: q=add($v1,$v2)&v1=mul(popularity,5)&v2=20.0
(yonik)
@@ -416,6 +438,15 @@ New Features
* SOLR-2129: Added a Solr module for dynamic metadata extraction/indexing with Apache UIMA.
See contrib/uima/README.txt for more information. (Tommaso Teofili via rmuir)
+* SOLR-2325: Allow tagging and exlcusion of main query for faceting. (yonik)
+
+* SOLR-2263: Add ability for RawResponseWriter to stream binary files as well as
+ text files. (Eric Pugh via yonik)
+
+* SOLR-860: Add debug output for MoreLikeThis. (koji)
+
+* SOLR-1057: Add PathHierarchyTokenizerFactory. (ryan, koji)
+
Optimizations
----------------------
@@ -513,7 +544,8 @@ Bug Fixes
* SOLR-1711: SolrJ - StreamingUpdateSolrServer had a race condition that
could halt the streaming of documents. The original patch to fix this
(never officially released) introduced another hanging bug due to
- connections not being released. (Attila Babo, Erik Hetzner via yonik)
+ connections not being released.
+ (Attila Babo, Erik Hetzner, Johannes Tuchscherer via yonik)
* SOLR-1748, SOLR-1747, SOLR-1746, SOLR-1745, SOLR-1744: Streams and Readers
retrieved from ContentStreams are not closed in various places, resulting
@@ -637,6 +669,15 @@ Bug Fixes
* SOLR-2261: fix velocity template layout.vm that referred to an older
version of jquery. (Eric Pugh via rmuir)
+* SOLR-1983: snappuller fails when modifiedConfFiles is not empty and
+ full copy of index is needed. (Alexander Kanarsky via yonik)
+
+* SOLR-2156: SnapPuller fails to clean Old Index Directories on Full Copy
+ (Jayendra Patil via yonik)
+
+* SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and
+ DocumentAnalysisRequestHandler to respect charset from XML file and only
+ use HTTP header's "Content-Type" as a "hint". (Uwe Schindler)
Other Changes
----------------------
@@ -730,6 +771,12 @@ Other Changes
* SOLR-2213: Upgrade to jQuery 1.4.3 (Erick Erickson via ryan)
+* SOLR-1826: Add unit tests for highlighting with termOffsets=true
+ and overlapping tokens. (Stefan Oestreicher via rmuir)
+
+* SOLR-2340: Add version infos to message in JavaBinCodec when throwing
+ exception. (koji)
+
Build
----------------------
Modified: lucene/dev/branches/bulkpostings/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/NOTICE.txt?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/NOTICE.txt (original)
+++ lucene/dev/branches/bulkpostings/solr/NOTICE.txt Wed Feb 9 01:03:49 2011
@@ -156,7 +156,6 @@ This product includes software developed
See http://project.carrot2.org/
-
=========================================================================
== Guava Notice ==
=========================================================================
@@ -168,6 +167,16 @@ This product includes software developed
See http://code.google.com/p/guava-libraries/
=========================================================================
+== Prettify Notice ==
+=========================================================================
+
+Copyright ???? Google, Inc.
+
+This product includes software developed by the Google Prettify project.
+
+See http://code.google.com/p/google-code-prettify/
+
+=========================================================================
== Jackson Notice ==
=========================================================================
Copyright ????
Modified: lucene/dev/branches/bulkpostings/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/build.xml Wed Feb 9 01:03:49 2011
@@ -18,6 +18,8 @@
-->
<project name="solr" default="usage" basedir="." xmlns:artifact="antlib:org.apache.maven.artifact.ant">
+
+ <property name="solr-path" value="." />
<import file="common-build.xml"/>
@@ -340,7 +342,7 @@
<path refid="compile.classpath.solrj" />
<pathelement location="${dest}/solr"/>
<pathelement location="${dest}/solrj"/> <!-- include solrj -->
- <pathelement location="${common-solr.dir}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
+ <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" /> <!-- include some lucene test code -->
</path>
<path id="test.run.classpath">
@@ -348,7 +350,7 @@
<pathelement location="${dest}/tests"/>
<!-- include the solrj classpath and jetty files included in example -->
<path refid="compile.classpath.solrj" />
- <pathelement location="${common-solr.dir}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
+ <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" /> <!-- include some lucene test code -->
<pathelement path="${java.class.path}"/>
</path>
@@ -956,6 +958,8 @@
description="runs the tasks over src/java excluding the license directory">
<rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
<fileset dir="src/java"/>
+ <fileset dir="src/test"/>
+ <fileset dir="src/webapp"/>
<fileset dir="src/common"/>
<fileset dir="src/solrj"/>
<fileset dir="client">
@@ -963,6 +967,8 @@
</fileset>
<fileset dir="contrib/dataimporthandler/src/main/java"/>
<fileset dir="contrib/dataimporthandler/src/test/java"/>
+ <fileset dir="contrib/dataimporthandler/src/extras/main/java"/>
+ <fileset dir="contrib/dataimporthandler/src/extras/test/java"/>
<fileset dir="contrib/clustering/src/main/java"/>
<fileset dir="contrib/clustering/src/test/java"/>
<fileset dir="contrib/extraction/src/main/java"/>
Modified: lucene/dev/branches/bulkpostings/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/common-build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/common-build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/common-build.xml Wed Feb 9 01:03:49 2011
@@ -33,6 +33,9 @@
<property name="name" value="${ant.project.name}"/>
+ <property name="dev-tools.dir" value="${solr-path}/../dev-tools"/>
+ <property name="prettify.dir" value="${dev-tools.dir}/prettify"/>
+
<tstamp>
<format property="year" pattern="yyyy"/>
<format property="DSTAMP" pattern="yyyy-MM-dd"/>
@@ -344,6 +347,9 @@
<attribute name="destdir"/>
<attribute name="title" default="${Name} ${version} API (${specversion})"/>
<sequential>
+ <copy todir="@{destdir}/../prettify" overwrite="false">
+ <fileset dir="${prettify.dir}"/>
+ </copy>
<javadoc
packagenames="org.apache.solr.*"
failonerror="true"
@@ -357,6 +363,7 @@
link="${javadoc.link.java}"
windowtitle="${Name} ${version} API"
doctitle="@{title}"
+ stylesheetfile="@{destdir}/../prettify/stylesheet+prettify.css"
bottom="Copyright &copy; ${year} Apache Software Foundation. All Rights Reserved.">
<tag name="todo" description="To Do:"/>
<tag name="uml.property" description="UML Property:"/>
@@ -368,6 +375,10 @@
<link href="${javadoc.link.java}"/>
<link href="${javadoc.link.junit}"/>
<link href="${javadoc.link.lucene}"/>
+ <header><![CDATA[
+ <script src="{@docRoot}/../prettify/prettify.js" type="text/javascript"></script>
+ <script language="JavaScript">window.onload=function(){windowTitle();prettyPrint();}</script>
+ ]]></header>
<sources />
Modified: lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/build.xml Wed Feb 9 01:03:49 2011
@@ -73,7 +73,7 @@
<pathelement path="${dest}/test-classes"/>
<pathelement path="${java.class.path}"/>
<pathelement location="${common-solr.dir}/build/tests"/> <!-- include solr test code -->
- <pathelement location="${common-solr.dir}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
+ <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" /> <!-- include some lucene test code -->
<path refid="common.classpath"/>
</path>
Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml Wed Feb 9 01:03:49 2011
@@ -42,7 +42,7 @@
<pathelement path="${dest}/test-classes"/>
<pathelement path="${java.class.path}"/>
<pathelement location="${common-solr.dir}/build/tests"/> <!-- include solr test code -->
- <pathelement location="${common-solr.dir}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
+ <pathelement location="${common-solr.dir}/../lucene/build/classes/test-framework" /> <!-- include some lucene test code -->
<path refid="common.classpath"/>
<!-- DistributedClusteringComponentTest uses Jetty -->
<fileset dir="${solr-path}/example/lib">
Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java Wed Feb 9 01:03:49 2011
@@ -46,6 +46,13 @@ import java.util.Set;
* <p/>
* This engine is experimental. Output from this engine is subject to change in future releases.
*
+ * <pre class="prettyprint" >
+ * <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering">
+ * <lst name="engine">
+ * <str name="name">default</str>
+ * <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+ * </lst>
+ * </searchComponent></pre>
*/
public class ClusteringComponent extends SearchComponent implements SolrCoreAware {
private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class);
Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Wed Feb 9 01:03:49 2011
@@ -77,6 +77,7 @@ public class CarrotClusteringEngine exte
private String idFieldName;
+ @Override
@Deprecated
public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
SolrIndexSearcher searcher = sreq.getSearcher();
@@ -90,6 +91,7 @@ public class CarrotClusteringEngine exte
}
}
+ @Override
public Object cluster(Query query, SolrDocumentList solrDocList,
Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
try {
Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java Wed Feb 9 01:03:49 2011
@@ -69,7 +69,8 @@ public class LuceneLanguageModelFactory
/**
* Provide an {@link IStemmer} implementation for a given language.
*/
- protected IStemmer createStemmer(LanguageCode language) {
+ @Override
+ protected IStemmer createStemmer(LanguageCode language) {
switch (language) {
case ARABIC:
return ArabicStemmerFactory.createStemmer();
Modified: lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java Wed Feb 9 01:03:49 2011
@@ -25,11 +25,13 @@ import org.apache.solr.search.DocSet;
*
**/
public class MockDocumentClusteringEngine extends DocumentClusteringEngine {
+ @Override
public NamedList cluster(DocSet docs, SolrParams solrParams) {
NamedList result = new NamedList();
return result;
}
+ @Override
public NamedList cluster(SolrParams solrParams) {
NamedList result = new NamedList();
return result;
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/build.xml Wed Feb 9 01:03:49 2011
@@ -56,7 +56,7 @@
<pathelement path="target/classes" />
<pathelement path="target/test-classes" />
<pathelement location="${solr-path}/build/tests"/> <!-- include solr test code -->
- <pathelement location="${solr-path}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
+ <pathelement location="${solr-path}/../lucene/build/classes/test-framework" /> <!-- include some lucene test code -->
<pathelement path="${java.class.path}"/>
</path>
@@ -68,7 +68,7 @@
<pathelement path="target/test-classes" />
<pathelement path="target/extras/test-classes" />
<pathelement location="${solr-path}/build/tests"/> <!-- include solr test code -->
- <pathelement location="${solr-path}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
+ <pathelement location="${solr-path}/../lucene/build/classes/test-framework" /> <!-- include some lucene test code -->
<pathelement path="${java.class.path}"/>
</path>
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/MailEntityProcessor.java Wed Feb 9 01:03:49 2011
@@ -51,6 +51,7 @@ public class MailEntityProcessor extends
public SearchTerm getCustomSearch(Folder folder);
}
+ @Override
public void init(Context context) {
super.init(context);
// set attributes using XXX getXXXFromContext(attribute, defualtValue);
@@ -95,6 +96,7 @@ public class MailEntityProcessor extends
logConfig();
}
+ @Override
public Map<String, Object> nextRow() {
Message mail;
Map<String, Object> row = null;
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/main/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java Wed Feb 9 01:03:49 2011
@@ -92,6 +92,7 @@ public class TikaEntityProcessor extends
done = false;
}
+ @Override
public Map<String, Object> nextRow() {
if(done) return null;
Map<String, Object> row = new HashMap<String, Object>();
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/extras/test/java/org/apache/solr/handler/dataimport/TestMailEntityProcessor.java Wed Feb 9 01:03:49 2011
@@ -191,18 +191,22 @@ public class TestMailEntityProcessor ext
super(null, ".", null);
}
+ @Override
public boolean upload(SolrInputDocument doc) {
return docs.add(doc);
}
+ @Override
public void log(int event, String name, Object row) {
// Do nothing
}
+ @Override
public void doDeleteAll() {
deleteAllCalled = Boolean.TRUE;
}
+ @Override
public void commit(boolean b) {
commitCalled = Boolean.TRUE;
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinContentStreamDataSource.java Wed Feb 9 01:03:49 2011
@@ -39,10 +39,12 @@ public class BinContentStreamDataSource
private InputStream in;
+ @Override
public void init(Context context, Properties initProps) {
this.context = (ContextImpl) context;
}
+ @Override
public InputStream getData(String query) {
contentStream = context.getDocBuilder().requestParameters.contentStream;
if (contentStream == null)
@@ -55,6 +57,7 @@ public class BinContentStreamDataSource
}
}
+ @Override
public void close() {
if (contentStream != null) {
try {
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinFileDataSource.java Wed Feb 9 01:03:49 2011
@@ -43,10 +43,12 @@ import java.util.Properties;
public class BinFileDataSource extends DataSource<InputStream>{
protected String basePath;
+ @Override
public void init(Context context, Properties initProps) {
basePath = initProps.getProperty(FileDataSource.BASE_PATH);
}
+ @Override
public InputStream getData(String query) {
File f = FileDataSource.getFile(basePath,query);
try {
@@ -57,6 +59,7 @@ public class BinFileDataSource extends D
}
}
+ @Override
public void close() {
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/BinURLDataSource.java Wed Feb 9 01:03:49 2011
@@ -49,6 +49,7 @@ public class BinURLDataSource extends Da
public BinURLDataSource() { }
+ @Override
public void init(Context context, Properties initProps) {
this.context = context;
this.initProps = initProps;
@@ -72,6 +73,7 @@ public class BinURLDataSource extends Da
}
}
+ @Override
public InputStream getData(String query) {
URL url = null;
try {
@@ -89,6 +91,7 @@ public class BinURLDataSource extends Da
}
}
+ @Override
public void close() { }
private String getInitPropWithReplacements(String propertyName) {
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/CachedSqlEntityProcessor.java Wed Feb 9 01:03:49 2011
@@ -38,6 +38,7 @@ import java.util.Map;
public class CachedSqlEntityProcessor extends SqlEntityProcessor {
private boolean isFirst;
+ @Override
@SuppressWarnings("unchecked")
public void init(Context context) {
super.init(context);
@@ -45,6 +46,7 @@ public class CachedSqlEntityProcessor ex
isFirst = true;
}
+ @Override
public Map<String, Object> nextRow() {
if (dataSourceRowCache != null)
return getFromRowCacheTransformed();
@@ -60,6 +62,7 @@ public class CachedSqlEntityProcessor ex
}
+ @Override
protected List<Map<String, Object>> getAllNonCachedRows() {
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
String q = getQuery();
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ClobTransformer.java Wed Feb 9 01:03:49 2011
@@ -37,6 +37,7 @@ import java.util.Map;
* @since solr 1.4
*/
public class ClobTransformer extends Transformer {
+ @Override
public Object transformRow(Map<String, Object> aRow, Context context) {
for (Map<String, String> map : context.getAllEntityFields()) {
if (!TRUE.equals(map.get(CLOB))) continue;
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java Wed Feb 9 01:03:49 2011
@@ -39,10 +39,12 @@ public class ContentStreamDataSource ext
private ContentStream contentStream;
private Reader reader;
+ @Override
public void init(Context context, Properties initProps) {
this.context = (ContextImpl) context;
}
+ @Override
public Reader getData(String query) {
contentStream = context.getDocBuilder().requestParameters.contentStream;
if (contentStream == null)
@@ -55,6 +57,7 @@ public class ContentStreamDataSource ext
}
}
+ @Override
public void close() {
if (contentStream != null) {
try {
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java Wed Feb 9 01:03:49 2011
@@ -71,22 +71,27 @@ public class ContextImpl extends Context
parent = parentContext;
}
+ @Override
public String getEntityAttribute(String name) {
return entity == null ? null : entity.allAttributes.get(name);
}
+ @Override
public String getResolvedEntityAttribute(String name) {
return entity == null ? null : resolver.replaceTokens(entity.allAttributes.get(name));
}
+ @Override
public List<Map<String, String>> getAllEntityFields() {
return entity == null ? Collections.EMPTY_LIST : entity.allFieldsList;
}
+ @Override
public VariableResolver getVariableResolver() {
return resolver;
}
+ @Override
public DataSource getDataSource() {
if (ds != null) return ds;
if(entity == null) return null;
@@ -101,26 +106,32 @@ public class ContextImpl extends Context
return entity.dataSrc;
}
+ @Override
public DataSource getDataSource(String name) {
return dataImporter.getDataSourceInstance(entity, name, this);
}
+ @Override
public boolean isRootEntity() {
return entity.isDocRoot;
}
+ @Override
public String currentProcess() {
return currProcess;
}
+ @Override
public Map<String, Object> getRequestParameters() {
return requestParams;
}
+ @Override
public EntityProcessor getEntityProcessor() {
return entity == null ? null : entity.processor;
}
+ @Override
public void setSessionAttribute(String name, Object val, String scope) {
if(name == null) return;
if (Context.SCOPE_ENTITY.equals(scope)) {
@@ -148,6 +159,7 @@ public class ContextImpl extends Context
else entitySession.put(name, val);
}
+ @Override
public Object getSessionAttribute(String name, String scope) {
if (Context.SCOPE_ENTITY.equals(scope)) {
if (entitySession == null)
@@ -166,6 +178,7 @@ public class ContextImpl extends Context
return null;
}
+ @Override
public Context getParentContext() {
return parent;
}
@@ -187,15 +200,18 @@ public class ContextImpl extends Context
}
+ @Override
public SolrCore getSolrCore() {
return dataImporter == null ? null : dataImporter.getCore();
}
+ @Override
public Map<String, Object> getStats() {
return docBuilder != null ? docBuilder.importStatistics.getStatsSnapshot() : Collections.<String, Object>emptyMap();
}
+ @Override
public String getScript() {
if(dataImporter != null) {
DataConfig.Script script = dataImporter.getConfig().script;
@@ -204,6 +220,7 @@ public class ContextImpl extends Context
return null;
}
+ @Override
public String getScriptLanguage() {
if (dataImporter != null) {
DataConfig.Script script = dataImporter.getConfig().script;
@@ -212,12 +229,14 @@ public class ContextImpl extends Context
return null;
}
+ @Override
public void deleteDoc(String id) {
if(docBuilder != null){
docBuilder.writer.deleteDoc(id);
}
}
+ @Override
public void deleteDocByQuery(String query) {
if(docBuilder != null){
docBuilder.writer.deleteByQuery(query);
@@ -227,10 +246,12 @@ public class ContextImpl extends Context
DocBuilder getDocBuilder(){
return docBuilder;
}
+ @Override
public Object resolve(String var) {
return resolver.resolve(var);
}
+ @Override
public String replaceTokens(String template) {
return resolver.replaceTokens(template);
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataConfig.java Wed Feb 9 01:03:49 2011
@@ -214,6 +214,7 @@ public class DataConfig {
public Map<String, String> allAttributes = new HashMap<String, String>() {
+ @Override
public String put(String key, String value) {
if (super.containsKey(key))
return super.get(key);
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java Wed Feb 9 01:03:49 2011
@@ -423,6 +423,7 @@ public class DataImporter {
}
static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
+ @Override
protected AtomicLong initialValue() {
return new AtomicLong();
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DateFormatTransformer.java Wed Feb 9 01:03:49 2011
@@ -45,6 +45,7 @@ public class DateFormatTransformer exten
private static final Logger LOG = LoggerFactory
.getLogger(DateFormatTransformer.class);
+ @Override
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> aRow, Context context) {
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DebugLogger.java Wed Feb 9 01:03:49 2011
@@ -60,6 +60,7 @@ class DebugLogger {
output = new NamedList();
debugStack = new Stack<DebugInfo>() {
+ @Override
public DebugInfo pop() {
if (size() == 1)
throw new DataImportHandlerException(
@@ -169,14 +170,17 @@ class DebugLogger {
DataSource wrapDs(final DataSource ds) {
return new DataSource() {
+ @Override
public void init(Context context, Properties initProps) {
ds.init(context, initProps);
}
+ @Override
public void close() {
ds.close();
}
+ @Override
public Object getData(String query) {
writer.log(SolrWriter.ENTITY_META, "query", query);
long start = System.currentTimeMillis();
@@ -203,6 +207,7 @@ class DebugLogger {
Transformer wrapTransformer(final Transformer t) {
return new Transformer() {
+ @Override
public Object transformRow(Map<String, Object> row, Context context) {
writer.log(SolrWriter.PRE_TRANSFORMER_ROW, null, row);
String tName = getTransformerName(t);
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DocBuilder.java Wed Feb 9 01:03:49 2011
@@ -139,6 +139,7 @@ public class DocBuilder {
document = dataImporter.getConfig().document;
final AtomicLong startTime = new AtomicLong(System.currentTimeMillis());
statusMessages.put(TIME_ELAPSED, new Object() {
+ @Override
public String toString() {
return getTimeElapsedSince(startTime.get());
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java Wed Feb 9 01:03:49 2011
@@ -49,6 +49,7 @@ public class EntityProcessorBase extends
protected String onError = ABORT;
+ @Override
public void init(Context context) {
rowIterator = null;
this.context = context;
@@ -86,14 +87,17 @@ public class EntityProcessorBase extends
}
}
+ @Override
public Map<String, Object> nextModifiedRowKey() {
return null;
}
+ @Override
public Map<String, Object> nextDeletedRowKey() {
return null;
}
+ @Override
public Map<String, Object> nextModifiedParentRowKey() {
return null;
}
@@ -105,11 +109,13 @@ public class EntityProcessorBase extends
* @return a row where the key is the name of the field and value can be any Object or a Collection of objects. Return
* null to signal end of rows
*/
+ @Override
public Map<String, Object> nextRow() {
return null;// do not do anything
}
+ @Override
public void destroy() {
/*no op*/
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorWrapper.java Wed Feb 9 01:03:49 2011
@@ -54,6 +54,7 @@ public class EntityProcessorWrapper exte
this.docBuilder = docBuilder;
}
+ @Override
public void init(Context context) {
rowcache = null;
this.context = context;
@@ -79,6 +80,7 @@ public class EntityProcessorWrapper exte
String[] transArr = transClasses.split(",");
transformers = new ArrayList<Transformer>() {
+ @Override
public boolean add(Transformer transformer) {
if (docBuilder != null && docBuilder.verboseDebug) {
transformer = docBuilder.writer.getDebugLogger().wrapTransformer(transformer);
@@ -135,6 +137,7 @@ public class EntityProcessorWrapper exte
o = clazz.newInstance();
}
+ @Override
public Object transformRow(Map<String, Object> aRow, Context context) {
try {
return meth.invoke(o, aRow);
@@ -223,6 +226,7 @@ public class EntityProcessorWrapper exte
&& Boolean.parseBoolean(oMap.get("$stopTransform").toString());
}
+ @Override
public Map<String, Object> nextRow() {
if (rowcache != null) {
return getFromRowCache();
@@ -252,6 +256,7 @@ public class EntityProcessorWrapper exte
}
}
+ @Override
public Map<String, Object> nextModifiedRowKey() {
Map<String, Object> row = delegate.nextModifiedRowKey();
row = applyTransformer(row);
@@ -259,6 +264,7 @@ public class EntityProcessorWrapper exte
return row;
}
+ @Override
public Map<String, Object> nextDeletedRowKey() {
Map<String, Object> row = delegate.nextDeletedRowKey();
row = applyTransformer(row);
@@ -266,10 +272,12 @@ public class EntityProcessorWrapper exte
return row;
}
+ @Override
public Map<String, Object> nextModifiedParentRowKey() {
return delegate.nextModifiedParentRowKey();
}
+ @Override
public void destroy() {
delegate.destroy();
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EvaluatorBag.java Wed Feb 9 01:03:49 2011
@@ -66,6 +66,7 @@ public class EvaluatorBag {
*/
public static Evaluator getSqlEscapingEvaluator() {
return new Evaluator() {
+ @Override
public String evaluate(String expression, Context context) {
List l = parseParams(expression, context.getVariableResolver());
if (l.size() != 1) {
@@ -90,6 +91,7 @@ public class EvaluatorBag {
*/
public static Evaluator getSolrQueryEscapingEvaluator() {
return new Evaluator() {
+ @Override
public String evaluate(String expression, Context context) {
List l = parseParams(expression, context.getVariableResolver());
if (l.size() != 1) {
@@ -109,6 +111,7 @@ public class EvaluatorBag {
*/
public static Evaluator getUrlEvaluator() {
return new Evaluator() {
+ @Override
public String evaluate(String expression, Context context) {
List l = parseParams(expression, context.getVariableResolver());
if (l.size() != 1) {
@@ -138,6 +141,7 @@ public class EvaluatorBag {
*/
public static Evaluator getDateFormatEvaluator() {
return new Evaluator() {
+ @Override
public String evaluate(String expression, Context context) {
List l = parseParams(expression, context.getVariableResolver());
if (l.size() != 2) {
@@ -288,6 +292,7 @@ public class EvaluatorBag {
}
+ @Override
public String toString() {
Object o = vr.resolve(varName);
return o == null ? null : o.toString();
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java Wed Feb 9 01:03:49 2011
@@ -52,6 +52,7 @@ public class FieldReaderDataSource exten
private String encoding;
private EntityProcessorWrapper entityProcessor;
+ @Override
public void init(Context context, Properties initProps) {
dataField = context.getEntityAttribute("dataField");
encoding = context.getEntityAttribute("encoding");
@@ -59,6 +60,7 @@ public class FieldReaderDataSource exten
/*no op*/
}
+ @Override
public Reader getData(String query) {
Object o = entityProcessor.getVariableResolver().resolve(dataField);
if (o == null) {
@@ -111,6 +113,7 @@ public class FieldReaderDataSource exten
}
}
+ @Override
public void close() {
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FieldStreamDataSource.java Wed Feb 9 01:03:49 2011
@@ -52,12 +52,14 @@ public class FieldStreamDataSource exten
protected String dataField;
private EntityProcessorWrapper wrapper;
+ @Override
public void init(Context context, Properties initProps) {
dataField = context.getEntityAttribute("dataField");
wrapper = (EntityProcessorWrapper) context.getEntityProcessor();
/*no op*/
}
+ @Override
public InputStream getData(String query) {
Object o = wrapper.getVariableResolver().resolve(dataField);
if (o == null) {
@@ -90,6 +92,7 @@ public class FieldStreamDataSource exten
}
+ @Override
public void close() {
}
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileDataSource.java Wed Feb 9 01:03:49 2011
@@ -59,6 +59,7 @@ public class FileDataSource extends Data
private static final Logger LOG = LoggerFactory.getLogger(FileDataSource.class);
+ @Override
public void init(Context context, Properties initProps) {
basePath = initProps.getProperty(BASE_PATH);
if (initProps.get(URLDataSource.ENCODING) != null)
@@ -79,6 +80,7 @@ public class FileDataSource extends Data
* returned Reader</b>
* </p>
*/
+ @Override
public Reader getData(String query) {
File f = getFile(basePath,query);
try {
@@ -130,6 +132,7 @@ public class FileDataSource extends Data
}
}
+ @Override
public void close() {
}
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/FileListEntityProcessor.java Wed Feb 9 01:03:49 2011
@@ -106,6 +106,7 @@ public class FileListEntityProcessor ext
private Pattern fileNamePattern, excludesPattern;
+ @Override
public void init(Context context) {
super.init(context);
fileName = context.getEntityAttribute(FILE_NAME);
@@ -195,6 +196,7 @@ public class FileListEntityProcessor ext
return Long.parseLong(sizeStr);
}
+ @Override
public Map<String, Object> nextRow() {
if (rowIterator != null)
return getNext();
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/JdbcDataSource.java Wed Feb 9 01:03:49 2011
@@ -54,6 +54,7 @@ public class JdbcDataSource extends
private int maxRows = 0;
+ @Override
public void init(Context context, Properties initProps) {
Object o = initProps.get(CONVERT_TYPE);
if (o != null)
@@ -204,6 +205,7 @@ public class JdbcDataSource extends
}
}
+ @Override
public Iterator<Map<String, Object>> getData(String query) {
ResultSetIterator r = new ResultSetIterator(query);
return r.getIterator();
@@ -370,6 +372,7 @@ public class JdbcDataSource extends
}
}
+ @Override
protected void finalize() throws Throwable {
try {
if(!isClosed){
@@ -383,6 +386,7 @@ public class JdbcDataSource extends
private boolean isClosed = false;
+ @Override
public void close() {
try {
closeConnection();
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LineEntityProcessor.java Wed Feb 9 01:03:49 2011
@@ -64,6 +64,7 @@ public class LineEntityProcessor extends
/**
* Parses each of the entity attributes.
*/
+ @Override
public void init(Context context) {
super.init(context);
String s;
@@ -97,6 +98,7 @@ public class LineEntityProcessor extends
* from the url. However transformers can be used to create as
* many other fields as required.
*/
+ @Override
public Map<String, Object> nextRow() {
if (reader == null) {
reader = new BufferedReader((Reader) context.getDataSource().getData(url));
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/LogTransformer.java Wed Feb 9 01:03:49 2011
@@ -35,6 +35,7 @@ import java.util.Map;
public class LogTransformer extends Transformer {
Logger LOG = LoggerFactory.getLogger(LogTransformer.class);
+ @Override
public Object transformRow(Map<String, Object> row, Context ctx) {
String expr = ctx.getEntityAttribute(LOG_TEMPLATE);
String level = ctx.replaceTokens(ctx.getEntityAttribute(LOG_LEVEL));
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/MockDataSource.java Wed Feb 9 01:03:49 2011
@@ -45,13 +45,16 @@ public class MockDataSource extends
cache.clear();
}
+ @Override
public void init(Context context, Properties initProps) {
}
+ @Override
public Iterator<Map<String, Object>> getData(String query) {
return cache.get(query);
}
+ @Override
public void close() {
cache.clear();
Modified: lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java (original)
+++ lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/NumberFormatTransformer.java Wed Feb 9 01:03:49 2011
@@ -49,6 +49,7 @@ public class NumberFormatTransformer ext
private static final Pattern localeRegex = Pattern.compile("^([a-z]{2})-([A-Z]{2})$");
+ @Override
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
for (Map<String, String> fld : context.getAllEntityFields()) {