You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2014/04/04 12:27:14 UTC
svn commit: r1584603 [3/12] - in /lucene/dev/branches/solr5914: ./
dev-tools/ dev-tools/idea/solr/core/src/test/ lucene/ lucene/analysis/
lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/
lucene/analysis/common/src/...
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java Fri Apr 4 10:27:05 2014
@@ -18,8 +18,8 @@ package org.apache.lucene.benchmark.byTa
*/
import java.io.File;
-import java.io.FileReader;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.benchmark.byTask.utils.Algorithm;
import org.apache.lucene.benchmark.byTask.utils.Config;
@@ -107,7 +107,7 @@ public class Benchmark {
Benchmark benchmark = null;
try {
- benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, IOUtils.CHARSET_UTF_8));
+ benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, StandardCharsets.UTF_8));
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java Fri Apr 4 10:27:05 2014
@@ -18,7 +18,6 @@ package org.apache.lucene.benchmark.byTa
*/
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.util.IOUtils;
import java.io.BufferedReader;
import java.io.File;
@@ -26,6 +25,7 @@ import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
@@ -206,7 +206,7 @@ public class DirContentSource extends Co
name = f.getCanonicalPath()+"_"+iteration;
}
- BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8));
String line = null;
//First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine();
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Fri Apr 4 10:27:05 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.byTa
import java.io.Closeable;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.Calendar;
@@ -318,7 +319,7 @@ public class DocMaker implements Closeab
if (storeBytes) {
Field bytesField = ds.getField(BYTES_FIELD, StringField.TYPE_STORED);
- bytesField.setBytesValue(bdy.getBytes("UTF-8"));
+ bytesField.setBytesValue(bdy.getBytes(StandardCharsets.UTF_8));
doc.add(bytesField);
}
}
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java Fri Apr 4 10:27:05 2014
@@ -20,18 +20,15 @@ package org.apache.lucene.benchmark.byTa
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
-import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.ThreadInterruptedException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@@ -182,10 +179,7 @@ public class EnwikiContentSource extends
if (localFileIS != null) { // null means fileIS was closed on us
try {
// To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
- CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
- .onMalformedInput(CodingErrorAction.REPORT)
- .onUnmappableCharacter(CodingErrorAction.REPORT);
- reader.parse(new InputSource(new BufferedReader(new InputStreamReader(localFileIS, decoder))));
+ reader.parse(new InputSource(IOUtils.getDecodingReader(localFileIS, StandardCharsets.UTF_8)));
} catch (IOException ioe) {
synchronized(EnwikiContentSource.this) {
if (localFileIS != is) {
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java Fri Apr 4 10:27:05 2014
@@ -9,6 +9,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import java.io.*;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -62,12 +63,12 @@ public class FileBasedQueryMaker extends
Reader reader = null;
// note: we use a decoding reader, so if your queries are screwed up you know
if (file.exists()) {
- reader = IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8);
+ reader = IOUtils.getDecodingReader(file, StandardCharsets.UTF_8);
} else {
//see if we can find it as a resource
InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName);
if (asStream != null) {
- reader = IOUtils.getDecodingReader(asStream, IOUtils.CHARSET_UTF_8);
+ reader = IOUtils.getDecodingReader(asStream, StandardCharsets.UTF_8);
}
}
if (reader != null) {
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java Fri Apr 4 10:27:05 2014
@@ -29,6 +29,7 @@ import java.util.Properties;
import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
+import org.apache.lucene.util.IOUtils;
/**
* A {@link ContentSource} reading one line at a time as a
@@ -277,7 +278,7 @@ public class LineDocSource extends Conte
}
file = new File(fileName).getAbsoluteFile();
if (encoding == null) {
- encoding = "UTF-8";
+ encoding = IOUtils.UTF_8;
}
}
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java Fri Apr 4 10:27:05 2014
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
@@ -30,7 +31,6 @@ import java.util.Date;
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.util.IOUtils;
/**
* A {@link ContentSource} reading from the Reuters collection.
@@ -114,7 +114,7 @@ public class ReutersContentSource extend
name = f.getCanonicalPath() + "_" + iteration;
}
- BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8));
try {
// First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine();
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java Fri Apr 4 10:27:05 2014
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
@@ -320,7 +321,7 @@ public class TrecContentSource extends C
}
// encoding
if (encoding == null) {
- encoding = "ISO-8859-1";
+ encoding = StandardCharsets.ISO_8859_1.name();
}
// iteration exclusion in doc name
excludeDocnameIteration = config.get("content.source.excludeIteration", false);
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java Fri Apr 4 10:27:05 2014
@@ -20,19 +20,18 @@ package org.apache.lucene.benchmark.byTa
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogMergePolicy;
-import org.apache.lucene.index.TieredMergePolicy;
-import org.apache.lucene.index.MergeScheduler;
-import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.NoDeletionPolicy;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NoMergeScheduler;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.util.Version;
import java.io.BufferedOutputStream;
@@ -130,7 +129,7 @@ public class CreateIndexTask extends Per
if (defaultCodec != null) {
try {
Class<? extends Codec> clazz = Class.forName(defaultCodec).asSubclass(Codec.class);
- Codec.setDefault(clazz.newInstance());
+ iwConf.setCodec(clazz.newInstance());
} catch (Exception e) {
throw new RuntimeException("Couldn't instantiate Codec: " + defaultCodec, e);
}
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTask.java Fri Apr 4 10:27:05 2014
@@ -5,6 +5,7 @@ import java.io.File;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
@@ -41,7 +42,7 @@ public class WriteEnwikiLineDocTask exte
public WriteEnwikiLineDocTask(PerfRunData runData) throws Exception {
super(runData);
OutputStream out = StreamUtils.outputStream(categoriesLineFile(new File(fname)));
- categoryLineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), StreamUtils.BUFFER_SIZE));
+ categoryLineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8), StreamUtils.BUFFER_SIZE));
writeHeader(categoryLineFileOut);
}
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java Fri Apr 4 10:27:05 2014
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.regex.Matcher;
@@ -101,7 +102,7 @@ public class WriteLineDocTask extends Pe
throw new IllegalArgumentException("line.file.out must be set");
}
OutputStream out = StreamUtils.outputStream(new File(fname));
- lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), StreamUtils.BUFFER_SIZE));
+ lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8), StreamUtils.BUFFER_SIZE));
docMaker = runData.getDocMaker();
// init fields
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java Fri Apr 4 10:27:05 2014
@@ -31,6 +31,7 @@ import java.io.File;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
@@ -53,7 +54,7 @@ public class QueryDriver {
File topicsFile = new File(args[0]);
File qrelsFile = new File(args[1]);
- SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene");
+ SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], IOUtils.UTF_8 /* huh, no nio.Charset ctor? */), "lucene");
FSDirectory dir = FSDirectory.open(new File(args[3]));
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
IndexReader reader = DirectoryReader.open(dir);
@@ -66,10 +67,10 @@ public class QueryDriver {
// use trec utilities to read trec topics into quality queries
TrecTopicsReader qReader = new TrecTopicsReader();
- QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8)));
+ QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, StandardCharsets.UTF_8)));
// prepare judge, with trec utilities that read from a QRels file
- Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8)));
+ Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, StandardCharsets.UTF_8)));
// validate topics & judgments match each other
judge.validateData(qqs, logger);
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java Fri Apr 4 10:27:05 2014
@@ -21,16 +21,13 @@ import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.lucene.util.IOUtils;
-
/**
* Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
@@ -78,7 +75,7 @@ public class ExtractReuters {
*/
protected void extractFile(File sgmFile) {
try {
- BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8));
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), StandardCharsets.UTF_8));
StringBuilder buffer = new StringBuilder(1024);
StringBuilder outBuffer = new StringBuilder(1024);
@@ -112,7 +109,7 @@ public class ExtractReuters {
File outFile = new File(outputDir, sgmFile.getName() + "-"
+ (docNumber++) + ".txt");
// System.out.println("Writing " + outFile);
- OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8);
+ OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), StandardCharsets.UTF_8);
writer.write(out);
writer.close();
outBuffer.setLength(0);
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java Fri Apr 4 10:27:05 2014
@@ -22,6 +22,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.nio.charset.StandardCharsets;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
@@ -30,7 +31,6 @@ import org.apache.lucene.benchmark.byTas
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
-import org.apache.lucene.util.IOUtils;
/**
* Extract the downloaded Wikipedia dump into separate files for indexing.
@@ -86,7 +86,7 @@ public class ExtractWikipedia {
contents.append("\n");
try {
- Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8);
+ Writer writer = new OutputStreamWriter(new FileOutputStream(f), StandardCharsets.UTF_8);
writer.write(contents.toString());
writer.close();
} catch (IOException ioe) {
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Fri Apr 4 10:27:05 2014
@@ -21,6 +21,7 @@ import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.text.Collator;
import java.util.List;
import java.util.Locale;
@@ -406,7 +407,7 @@ public class TestPerfTasksLogic extends
BufferedReader r = new BufferedReader(
new InputStreamReader(
- new FileInputStream(lineFile), "UTF-8"));
+ new FileInputStream(lineFile), StandardCharsets.UTF_8));
int numLines = 0;
String line;
while((line = r.readLine()) != null) {
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java Fri Apr 4 10:27:05 2014
@@ -23,6 +23,7 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import org.apache.lucene.benchmark.byTask.feeds.AbstractQueryMaker;
@@ -121,7 +122,7 @@ public class TestPerfTasksParse extends
public boolean accept(File pathname) { return pathname.isFile() && pathname.getName().endsWith(".alg"); }
})) {
try {
- Config config = new Config(new InputStreamReader(new FileInputStream(algFile), "UTF-8"));
+ Config config = new Config(new InputStreamReader(new FileInputStream(algFile), StandardCharsets.UTF_8));
String contentSource = config.get("content.source", null);
if (contentSource != null) { Class.forName(contentSource); }
config.set("work.dir", TestUtil.createTempDir(LuceneTestCase.getTestClass().getSimpleName()).getAbsolutePath());
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java Fri Apr 4 10:27:05 2014
@@ -36,6 +36,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.IOUtils;
/** Tests the functionality of {@link DocMaker}. */
public class DocMakerTest extends BenchmarkTestCase {
@@ -166,7 +167,7 @@ public class DocMakerTest extends Benchm
// DocMaker did not close its ContentSource if resetInputs was called twice,
// leading to a file handle leak.
File f = new File(getWorkDir(), "docMakerLeak.txt");
- PrintStream ps = new PrintStream(f, "UTF-8");
+ PrintStream ps = new PrintStream(f, IOUtils.UTF_8);
ps.println("one title\t" + System.currentTimeMillis() + "\tsome content");
ps.close();
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSourceTest.java Fri Apr 4 10:27:05 2014
@@ -18,15 +18,13 @@ package org.apache.lucene.benchmark.byTa
*/
import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
@@ -43,7 +41,7 @@ public class EnwikiContentSourceTest ext
@Override
protected InputStream openInputStream() throws IOException {
- return new ByteArrayInputStream(docs.getBytes(IOUtils.CHARSET_UTF_8));
+ return new ByteArrayInputStream(docs.getBytes(StandardCharsets.UTF_8));
}
}
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java Fri Apr 4 10:27:05 2014
@@ -23,6 +23,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
import java.util.Properties;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
@@ -53,7 +54,7 @@ public class LineDocSourceTest extends B
private void createBZ2LineFile(File file, boolean addHeader) throws Exception {
OutputStream out = new FileOutputStream(file);
out = csFactory.createCompressorOutputStream("bzip2", out);
- BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
writeDocsToFile(writer, addHeader, null);
writer.close();
}
@@ -90,14 +91,14 @@ public class LineDocSourceTest extends B
private void createRegularLineFile(File file, boolean addHeader) throws Exception {
OutputStream out = new FileOutputStream(file);
- BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
writeDocsToFile(writer, addHeader, null);
writer.close();
}
private void createRegularLineFileWithMoreFields(File file, String...extraFields) throws Exception {
OutputStream out = new FileOutputStream(file);
- BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, "utf-8"));
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8));
Properties p = new Properties();
for (String f : extraFields) {
p.setProperty(f, f);
@@ -209,7 +210,7 @@ public class LineDocSourceTest extends B
for (int i = 0; i < testCases.length; i++) {
File file = new File(getWorkDir(), "one-line");
- BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "utf-8"));
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), StandardCharsets.UTF_8));
writer.write(testCases[i]);
writer.newLine();
writer.close();
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java Fri Apr 4 10:27:05 2014
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
@@ -73,7 +74,7 @@ public class WriteEnwikiLineDocTaskTest
private void doReadTest(int n, File file, String expTitle, String expDate, String expBody) throws Exception {
InputStream in = new FileInputStream(file);
- BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
try {
String line = br.readLine();
WriteLineDocTaskTest.assertHeaderLine(line);
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java Fri Apr 4 10:27:05 2014
@@ -22,6 +22,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
@@ -168,7 +169,7 @@ public class WriteLineDocTaskTest extend
default:
assertFalse("Unknown file type!",true); //fail, should not happen
}
- BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
try {
String line = br.readLine();
assertHeaderLine(line);
@@ -274,7 +275,7 @@ public class WriteLineDocTaskTest extend
wldt.doLogic();
wldt.close();
- BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
try {
String line = br.readLine();
assertHeaderLine(line);
@@ -292,7 +293,7 @@ public class WriteLineDocTaskTest extend
wldt.doLogic();
wldt.close();
- BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
try {
String line = br.readLine();
assertHeaderLine(line);
@@ -310,7 +311,7 @@ public class WriteLineDocTaskTest extend
wldt.doLogic();
wldt.close();
- BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
try {
String line = br.readLine();
assertHeaderLine(line);
@@ -345,7 +346,7 @@ public class WriteLineDocTaskTest extend
wldt.close();
Set<String> ids = new HashSet<>();
- BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8));
try {
String line = br.readLine();
assertHeaderLine(line); // header line is written once, no matter how many threads there are
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java Fri Apr 4 10:27:05 2014
@@ -26,10 +26,10 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import org.junit.After;
import org.junit.Before;
@@ -87,7 +87,7 @@ public class StreamUtilsTest extends Ben
private File rawTextFile(String ext) throws Exception {
File f = new File(testDir,"testfile." + ext);
- BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8));
+ BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), StandardCharsets.UTF_8));
w.write(TEXT);
w.newLine();
w.close();
@@ -116,7 +116,7 @@ public class StreamUtilsTest extends Ben
}
private void writeText(OutputStream os) throws IOException {
- BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8));
+ BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8));
w.write(TEXT);
w.newLine();
w.close();
@@ -124,7 +124,7 @@ public class StreamUtilsTest extends Ben
private void assertReadText(File f) throws Exception {
InputStream ir = StreamUtils.inputStream(f);
- InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8);
+ InputStreamReader in = new InputStreamReader(ir, StandardCharsets.UTF_8);
BufferedReader r = new BufferedReader(in);
String line = r.readLine();
assertEquals("Wrong text found in "+f.getName(), TEXT, line);
@@ -136,14 +136,14 @@ public class StreamUtilsTest extends Ben
public void setUp() throws Exception {
super.setUp();
testDir = new File(getWorkDir(),"ContentSourceTest");
- TestUtil.rmDir(testDir);
+ TestUtil.rm(testDir);
assertTrue(testDir.mkdirs());
}
@Override
@After
public void tearDown() throws Exception {
- TestUtil.rmDir(testDir);
+ TestUtil.rm(testDir);
super.tearDown();
}
Modified: lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java (original)
+++ lucene/dev/branches/solr5914/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java Fri Apr 4 10:27:05 2014
@@ -34,6 +34,7 @@ import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
/**
* Test that quality run does its job.
@@ -62,11 +63,11 @@ public class TestQualityRun extends Benc
// prepare topics
InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
TrecTopicsReader qReader = new TrecTopicsReader();
- QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, "UTF-8")));
+ QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new InputStreamReader(topics, StandardCharsets.UTF_8)));
// prepare judge
InputStream qrels = getClass().getResourceAsStream("trecQRels.txt");
- Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, "UTF-8")));
+ Judge judge = new TrecJudge(new BufferedReader(new InputStreamReader(qrels, StandardCharsets.UTF_8)));
// validate topics & judgments match each other
judge.validateData(qqs, logger);
@@ -147,7 +148,7 @@ public class TestQualityRun extends Benc
InputStream topicsFile = getClass().getResourceAsStream("trecTopics.txt");
TrecTopicsReader qReader = new TrecTopicsReader();
QualityQuery qqs[] = qReader.readQueries(
- new BufferedReader(new InputStreamReader(topicsFile, "UTF-8")));
+ new BufferedReader(new InputStreamReader(topicsFile, StandardCharsets.UTF_8)));
assertEquals(20, qqs.length);
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java Fri Apr 4 10:27:05 2014
@@ -177,7 +177,10 @@ public class BlockTermsReader extends Fi
}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
- if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
+ if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ dirOffset = input.readLong();
+ } else if (version >= BlockTermsWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
@@ -863,4 +866,14 @@ public class BlockTermsReader extends Fi
sizeInBytes += (indexReader!=null) ? indexReader.ramBytesUsed() : 0;
return sizeInBytes;
}
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ // verify terms
+ if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
+ // verify postings
+ postingsReader.checkIntegrity();
+ }
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java Fri Apr 4 10:27:05 2014
@@ -63,12 +63,13 @@ public class BlockTermsWriter extends Fi
public static final int VERSION_START = 0;
public static final int VERSION_APPEND_ONLY = 1;
public static final int VERSION_META_ARRAY = 2;
- public static final int VERSION_CURRENT = VERSION_META_ARRAY;
+ public static final int VERSION_CHECKSUM = 3;
+ public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of terms file */
static final String TERMS_EXTENSION = "tib";
- protected final IndexOutput out;
+ protected IndexOutput out;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
FieldInfo currentField;
@@ -176,26 +177,30 @@ public class BlockTermsWriter extends Fi
}
public void close() throws IOException {
- try {
- final long dirStart = out.getFilePointer();
-
- out.writeVInt(fields.size());
- for(FieldMetaData field : fields) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.numTerms);
- out.writeVLong(field.termsStartPointer);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- out.writeVLong(field.sumTotalTermFreq);
- }
- out.writeVLong(field.sumDocFreq);
- out.writeVInt(field.docCount);
- if (VERSION_CURRENT >= VERSION_META_ARRAY) {
- out.writeVInt(field.longsSize);
+ if (out != null) {
+ try {
+ final long dirStart = out.getFilePointer();
+
+ out.writeVInt(fields.size());
+ for(FieldMetaData field : fields) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.numTerms);
+ out.writeVLong(field.termsStartPointer);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ out.writeVLong(field.sumTotalTermFreq);
+ }
+ out.writeVLong(field.sumDocFreq);
+ out.writeVInt(field.docCount);
+ if (VERSION_CURRENT >= VERSION_META_ARRAY) {
+ out.writeVInt(field.longsSize);
+ }
}
+ writeTrailer(dirStart);
+ CodecUtil.writeFooter(out);
+ } finally {
+ IOUtils.close(out, postingsWriter, termsIndexWriter);
+ out = null;
}
- writeTrailer(dirStart);
- } finally {
- IOUtils.close(out, postingsWriter, termsIndexWriter);
}
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexReader.java Fri Apr 4 10:27:05 2014
@@ -66,6 +66,8 @@ public class FixedGapTermsIndexReader ex
// start of the field info data
private long dirOffset;
+ private int version;
+
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, Comparator<BytesRef> termComp, String segmentSuffix, IOContext context)
throws IOException {
@@ -78,6 +80,11 @@ public class FixedGapTermsIndexReader ex
try {
readHeader(in);
+
+ if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
+
indexInterval = in.readVInt();
if (indexInterval < 1) {
throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + in + ")");
@@ -124,7 +131,7 @@ public class FixedGapTermsIndexReader ex
}
private void readHeader(IndexInput input) throws IOException {
- CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
+ version = CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
FixedGapTermsIndexWriter.VERSION_CURRENT, FixedGapTermsIndexWriter.VERSION_CURRENT);
}
@@ -273,7 +280,11 @@ public class FixedGapTermsIndexReader ex
public void close() throws IOException {}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
- input.seek(input.length() - 8);
+ if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) {
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ } else {
+ input.seek(input.length() - 8);
+ }
dirOffset = input.readLong();
input.seek(dirOffset);
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/FixedGapTermsIndexWriter.java Fri Apr 4 10:27:05 2014
@@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexFile
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
@@ -43,7 +42,7 @@ import java.io.IOException;
*
* @lucene.experimental */
public class FixedGapTermsIndexWriter extends TermsIndexWriterBase {
- protected final IndexOutput out;
+ protected IndexOutput out;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tii";
@@ -52,7 +51,8 @@ public class FixedGapTermsIndexWriter ex
final static int VERSION_START = 0;
final static int VERSION_APPEND_ONLY = 1;
final static int VERSION_MONOTONIC_ADDRESSING = 2;
- final static int VERSION_CURRENT = VERSION_MONOTONIC_ADDRESSING;
+ final static int VERSION_CHECKSUM = 3;
+ final static int VERSION_CURRENT = VERSION_CHECKSUM;
final static int BLOCKSIZE = 4096;
final private int termIndexInterval;
@@ -207,38 +207,42 @@ public class FixedGapTermsIndexWriter ex
@Override
public void close() throws IOException {
- boolean success = false;
- try {
- final long dirStart = out.getFilePointer();
- final int fieldCount = fields.size();
-
- int nonNullFieldCount = 0;
- for(int i=0;i<fieldCount;i++) {
- SimpleFieldWriter field = fields.get(i);
- if (field.numIndexTerms > 0) {
- nonNullFieldCount++;
+ if (out != null) {
+ boolean success = false;
+ try {
+ final long dirStart = out.getFilePointer();
+ final int fieldCount = fields.size();
+
+ int nonNullFieldCount = 0;
+ for(int i=0;i<fieldCount;i++) {
+ SimpleFieldWriter field = fields.get(i);
+ if (field.numIndexTerms > 0) {
+ nonNullFieldCount++;
+ }
}
- }
-
- out.writeVInt(nonNullFieldCount);
- for(int i=0;i<fieldCount;i++) {
- SimpleFieldWriter field = fields.get(i);
- if (field.numIndexTerms > 0) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVInt(field.numIndexTerms);
- out.writeVLong(field.termsStart);
- out.writeVLong(field.indexStart);
- out.writeVLong(field.packedIndexStart);
- out.writeVLong(field.packedOffsetsStart);
+
+ out.writeVInt(nonNullFieldCount);
+ for(int i=0;i<fieldCount;i++) {
+ SimpleFieldWriter field = fields.get(i);
+ if (field.numIndexTerms > 0) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVInt(field.numIndexTerms);
+ out.writeVLong(field.termsStart);
+ out.writeVLong(field.indexStart);
+ out.writeVLong(field.packedIndexStart);
+ out.writeVLong(field.packedOffsetsStart);
+ }
}
- }
- writeTrailer(dirStart);
- success = true;
- } finally {
- if (success) {
- IOUtils.close(out);
- } else {
- IOUtils.closeWhileHandlingException(out);
+ writeTrailer(dirStart);
+ CodecUtil.writeFooter(out);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(out);
+ } else {
+ IOUtils.closeWhileHandlingException(out);
+ }
+ out = null;
}
}
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java Fri Apr 4 10:27:05 2014
@@ -62,6 +62,10 @@ public class VariableGapTermsIndexReader
try {
version = readHeader(in);
+
+ if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
seekDir(in, dirOffset);
@@ -190,7 +194,10 @@ public class VariableGapTermsIndexReader
public void close() throws IOException {}
private void seekDir(IndexInput input, long dirOffset) throws IOException {
- if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
+ if (version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) {
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ dirOffset = input.readLong();
+ } else if (version >= VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) {
input.seek(input.length() - 8);
dirOffset = input.readLong();
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java Fri Apr 4 10:27:05 2014
@@ -45,7 +45,7 @@ import org.apache.lucene.util.fst.Util;
*
* @lucene.experimental */
public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
- protected final IndexOutput out;
+ protected IndexOutput out;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tiv";
@@ -53,7 +53,8 @@ public class VariableGapTermsIndexWriter
final static String CODEC_NAME = "VARIABLE_GAP_TERMS_INDEX";
final static int VERSION_START = 0;
final static int VERSION_APPEND_ONLY = 1;
- final static int VERSION_CURRENT = VERSION_APPEND_ONLY;
+ final static int VERSION_CHECKSUM = 2;
+ final static int VERSION_CURRENT = VERSION_CHECKSUM;
private final List<FSTFieldWriter> fields = new ArrayList<>();
@@ -290,30 +291,34 @@ public class VariableGapTermsIndexWriter
@Override
public void close() throws IOException {
- try {
- final long dirStart = out.getFilePointer();
- final int fieldCount = fields.size();
-
- int nonNullFieldCount = 0;
- for(int i=0;i<fieldCount;i++) {
- FSTFieldWriter field = fields.get(i);
- if (field.fst != null) {
- nonNullFieldCount++;
- }
- }
-
- out.writeVInt(nonNullFieldCount);
- for(int i=0;i<fieldCount;i++) {
- FSTFieldWriter field = fields.get(i);
- if (field.fst != null) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.indexStart);
+ if (out != null) {
+ try {
+ final long dirStart = out.getFilePointer();
+ final int fieldCount = fields.size();
+
+ int nonNullFieldCount = 0;
+ for(int i=0;i<fieldCount;i++) {
+ FSTFieldWriter field = fields.get(i);
+ if (field.fst != null) {
+ nonNullFieldCount++;
+ }
+ }
+
+ out.writeVInt(nonNullFieldCount);
+ for(int i=0;i<fieldCount;i++) {
+ FSTFieldWriter field = fields.get(i);
+ if (field.fst != null) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.indexStart);
+ }
+ }
+ writeTrailer(dirStart);
+ CodecUtil.writeFooter(out);
+ } finally {
+ out.close();
+ out = null;
}
}
- writeTrailer(dirStart);
- } finally {
- out.close();
- }
}
private void writeTrailer(long dirStart) throws IOException {
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Fri Apr 4 10:27:05 2014
@@ -39,8 +39,8 @@ import org.apache.lucene.index.SegmentRe
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -66,7 +66,7 @@ import org.apache.lucene.util.automaton.
* </p>
* <ul>
* <li>BloomFilter (.blm) --> Header, DelegatePostingsFormatName,
- * NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
+ * NumFilteredFields, Filter<sup>NumFilteredFields</sup>, Footer</li>
* <li>Filter --> FieldNumber, FuzzySet</li>
* <li>FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}</li>
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
@@ -75,13 +75,16 @@ import org.apache.lucene.util.automaton.
* <li>NumFilteredFields --> {@link DataOutput#writeInt Uint32}</li>
* <li>FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the
* field in this segment</li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* @lucene.experimental
*/
public final class BloomFilteringPostingsFormat extends PostingsFormat {
public static final String BLOOM_CODEC_NAME = "BloomFilter";
- public static final int BLOOM_CODEC_VERSION = 1;
+ public static final int VERSION_START = 1;
+ public static final int VERSION_CHECKSUM = 2;
+ public static final int VERSION_CURRENT = VERSION_CHECKSUM;
/** Extension of Bloom Filters file */
static final String BLOOM_EXTENSION = "blm";
@@ -157,12 +160,11 @@ public final class BloomFilteringPosting
String bloomFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
- IndexInput bloomIn = null;
+ ChecksumIndexInput bloomIn = null;
boolean success = false;
try {
- bloomIn = state.directory.openInput(bloomFileName, state.context);
- CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
- BLOOM_CODEC_VERSION);
+ bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
+ int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, VERSION_START, VERSION_CURRENT);
// // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format
@@ -178,6 +180,11 @@ public final class BloomFilteringPosting
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
bloomsByFieldName.put(fieldInfo.name, bloom);
}
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(bloomIn);
+ } else {
+ CodecUtil.checkEOF(bloomIn);
+ }
IOUtils.close(bloomIn);
success = true;
} finally {
@@ -390,6 +397,11 @@ public final class BloomFilteringPosting
}
return sizeInBytes;
}
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ delegateFieldsProducer.checkIntegrity();
+ }
}
class BloomFilteredFieldsConsumer extends FieldsConsumer {
@@ -466,10 +478,8 @@ public final class BloomFilteringPosting
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
IndexOutput bloomOutput = null;
try {
- bloomOutput = state.directory
- .createOutput(bloomFileName, state.context);
- CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
- BLOOM_CODEC_VERSION);
+ bloomOutput = state.directory.createOutput(bloomFileName, state.context);
+ CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME, VERSION_CURRENT);
// remember the name of the postings format we will delegate to
bloomOutput.writeString(delegatePostingsFormat.getName());
@@ -481,6 +491,7 @@ public final class BloomFilteringPosting
bloomOutput.writeInt(fieldInfo.number);
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
}
+ CodecUtil.writeFooter(bloomOutput);
} finally {
IOUtils.close(bloomOutput);
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java Fri Apr 4 10:27:05 2014
@@ -40,7 +40,7 @@ import static org.apache.lucene.codecs.m
*/
class DirectDocValuesConsumer extends DocValuesConsumer {
- final IndexOutput data, meta;
+ IndexOutput data, meta;
final int maxDoc;
DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
@@ -142,6 +142,10 @@ class DirectDocValuesConsumer extends Do
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
+ }
+ if (data != null) {
+ CodecUtil.writeFooter(data);
}
success = true;
} finally {
@@ -150,6 +154,7 @@ class DirectDocValuesConsumer extends Do
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
+ data = meta = null;
}
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesProducer.java Fri Apr 4 10:27:05 2014
@@ -33,6 +33,7 @@ import org.apache.lucene.index.RandomAcc
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -65,6 +66,7 @@ class DirectDocValuesProducer extends Do
private final int maxDoc;
private final AtomicLong ramBytesUsed;
+ private final int version;
static final byte NUMBER = 0;
static final byte BYTES = 1;
@@ -72,22 +74,27 @@ class DirectDocValuesProducer extends Do
static final byte SORTED_SET = 3;
static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
+ static final int VERSION_CHECKSUM = 1;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
// read in the entries from the metadata file.
- IndexInput in = state.directory.openInput(metaName, state.context);
+ ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
boolean success = false;
- final int version;
try {
version = CodecUtil.checkHeader(in, metaCodec,
VERSION_START,
VERSION_CURRENT);
readFields(in);
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(in);
+ } else {
+ CodecUtil.checkEOF(in);
+ }
success = true;
} finally {
if (success) {
@@ -186,6 +193,13 @@ class DirectDocValuesProducer extends Do
}
@Override
+ public void checkIntegrity() throws IOException {
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(data);
+ }
+ }
+
+ @Override
public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException {
NumericDocValues instance = numericInstances.get(field.number);
if (instance == null) {
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Fri Apr 4 10:27:05 2014
@@ -109,6 +109,7 @@ public final class DirectPostingsFormat
if (state.context.context != IOContext.Context.MERGE) {
FieldsProducer loadedPostings;
try {
+ postings.checkIntegrity();
loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
} finally {
postings.close();
@@ -157,6 +158,12 @@ public final class DirectPostingsFormat
}
return sizeInBytes;
}
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ // if we read entirely into ram, we already validated.
+ // otherwise returned the raw postings reader
+ }
}
private final static class DirectField extends Terms {
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java Fri Apr 4 10:27:05 2014
@@ -38,6 +38,7 @@ import org.apache.lucene.index.TermState
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
@@ -56,14 +57,13 @@ import org.apache.lucene.codecs.BlockTer
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.memory.FSTTermsReader.TermsReader;
/**
* FST-based terms dictionary reader.
*
* The FST index maps each term and its ord, and during seek
* the ord is used fetch metadata from a single block.
- * The term dictionary is fully memeory resident.
+ * The term dictionary is fully memory resident.
*
* @lucene.experimental
*/
@@ -71,8 +71,7 @@ public class FSTOrdTermsReader extends F
static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
final TreeMap<String, TermsReader> fields = new TreeMap<>();
final PostingsReaderBase postingsReader;
- IndexInput indexIn = null;
- IndexInput blockIn = null;
+ int version;
//static final boolean TEST = false;
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
@@ -80,11 +79,18 @@ public class FSTOrdTermsReader extends F
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
this.postingsReader = postingsReader;
+ ChecksumIndexInput indexIn = null;
+ IndexInput blockIn = null;
+ boolean success = false;
try {
- this.indexIn = state.directory.openInput(termsIndexFileName, state.context);
- this.blockIn = state.directory.openInput(termsBlockFileName, state.context);
- readHeader(indexIn);
+ indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context);
+ blockIn = state.directory.openInput(termsBlockFileName, state.context);
+ version = readHeader(indexIn);
readHeader(blockIn);
+ if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(blockIn);
+ }
+
this.postingsReader.init(blockIn);
seekDir(blockIn);
@@ -100,12 +106,22 @@ public class FSTOrdTermsReader extends F
int longsSize = blockIn.readVInt();
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
- TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
+ TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
TermsReader previous = fields.put(fieldInfo.name, current);
- checkFieldSummary(state.segmentInfo, current, previous);
+ checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous);
+ }
+ if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(indexIn);
+ } else {
+ CodecUtil.checkEOF(indexIn);
}
+ success = true;
} finally {
- IOUtils.closeWhileHandlingException(indexIn, blockIn);
+ if (success) {
+ IOUtils.close(indexIn, blockIn);
+ } else {
+ IOUtils.closeWhileHandlingException(indexIn, blockIn);
+ }
}
}
@@ -115,10 +131,14 @@ public class FSTOrdTermsReader extends F
FSTOrdTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
- in.seek(in.length() - 8);
+ if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) {
+ in.seek(in.length() - CodecUtil.footerLength() - 8);
+ } else {
+ in.seek(in.length() - 8);
+ }
in.seek(in.readLong());
}
- private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
+ private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + indexIn + ", " + blockIn + ")");
@@ -176,7 +196,7 @@ public class FSTOrdTermsReader extends F
final byte[] metaLongsBlock;
final byte[] metaBytesBlock;
- TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
+ TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@@ -819,4 +839,9 @@ public class FSTOrdTermsReader extends F
}
return ramBytesUsed;
}
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ postingsReader.checkIntegrity();
+ }
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java Fri Apr 4 10:27:05 2014
@@ -73,9 +73,10 @@ import org.apache.lucene.util.fst.Util;
* </p>
*
* <ul>
- * <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup></li>
+ * <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup>, Footer</li>
* <li>TermFST --> {@link FST FST<long>}</li>
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
*
* <p>Notes:</p>
@@ -103,7 +104,7 @@ import org.apache.lucene.util.fst.Util;
* <ul>
* <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
- * DocCount, LongsSize, DataBlock > <sup>NumFields</sup></li>
+ * DocCount, LongsSize, DataBlock > <sup>NumFields</sup>, Footer</li>
*
* <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
@@ -119,6 +120,7 @@ import org.apache.lucene.util.fst.Util;
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
* LongDelta,--> {@link DataOutput#writeVLong VLong}</li>
+ * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes: </p>
* <ul>
@@ -148,7 +150,8 @@ public class FSTOrdTermsWriter extends F
static final String TERMS_BLOCK_EXTENSION = "tbk";
static final String TERMS_CODEC_NAME = "FST_ORD_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
- public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
+ public static final int TERMS_VERSION_CHECKSUM = 1;
+ public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
public static final int SKIP_INTERVAL = 8;
final PostingsWriterBase postingsWriter;
@@ -218,36 +221,41 @@ public class FSTOrdTermsWriter extends F
}
public void close() throws IOException {
- IOException ioe = null;
- try {
- final long blockDirStart = blockOut.getFilePointer();
-
- // write field summary
- blockOut.writeVInt(fields.size());
- for (FieldMetaData field : fields) {
- blockOut.writeVInt(field.fieldInfo.number);
- blockOut.writeVLong(field.numTerms);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- blockOut.writeVLong(field.sumTotalTermFreq);
+ if (blockOut != null) {
+ IOException ioe = null;
+ try {
+ final long blockDirStart = blockOut.getFilePointer();
+
+ // write field summary
+ blockOut.writeVInt(fields.size());
+ for (FieldMetaData field : fields) {
+ blockOut.writeVInt(field.fieldInfo.number);
+ blockOut.writeVLong(field.numTerms);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ blockOut.writeVLong(field.sumTotalTermFreq);
+ }
+ blockOut.writeVLong(field.sumDocFreq);
+ blockOut.writeVInt(field.docCount);
+ blockOut.writeVInt(field.longsSize);
+ blockOut.writeVLong(field.statsOut.getFilePointer());
+ blockOut.writeVLong(field.metaLongsOut.getFilePointer());
+ blockOut.writeVLong(field.metaBytesOut.getFilePointer());
+
+ field.skipOut.writeTo(blockOut);
+ field.statsOut.writeTo(blockOut);
+ field.metaLongsOut.writeTo(blockOut);
+ field.metaBytesOut.writeTo(blockOut);
+ field.dict.save(indexOut);
}
- blockOut.writeVLong(field.sumDocFreq);
- blockOut.writeVInt(field.docCount);
- blockOut.writeVInt(field.longsSize);
- blockOut.writeVLong(field.statsOut.getFilePointer());
- blockOut.writeVLong(field.metaLongsOut.getFilePointer());
- blockOut.writeVLong(field.metaBytesOut.getFilePointer());
-
- field.skipOut.writeTo(blockOut);
- field.statsOut.writeTo(blockOut);
- field.metaLongsOut.writeTo(blockOut);
- field.metaBytesOut.writeTo(blockOut);
- field.dict.save(indexOut);
+ writeTrailer(blockOut, blockDirStart);
+ CodecUtil.writeFooter(indexOut);
+ CodecUtil.writeFooter(blockOut);
+ } catch (IOException ioe2) {
+ ioe = ioe2;
+ } finally {
+ IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
+ blockOut = null;
}
- writeTrailer(blockOut, blockDirStart);
- } catch (IOException ioe2) {
- ioe = ioe2;
- } finally {
- IOUtils.closeWhileHandlingException(ioe, blockOut, indexOut, postingsWriter);
}
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java Fri Apr 4 10:27:05 2014
@@ -59,7 +59,7 @@ import org.apache.lucene.codecs.CodecUti
* FST-based terms dictionary reader.
*
* The FST directly maps each term and its metadata,
- * it is memeory resident.
+ * it is memory resident.
*
* @lucene.experimental
*/
@@ -67,18 +67,21 @@ import org.apache.lucene.codecs.CodecUti
public class FSTTermsReader extends FieldsProducer {
final TreeMap<String, TermsReader> fields = new TreeMap<>();
final PostingsReaderBase postingsReader;
- final IndexInput in;
//static boolean TEST = false;
+ final int version;
public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
- this.in = state.directory.openInput(termsFileName, state.context);
+ final IndexInput in = state.directory.openInput(termsFileName, state.context);
boolean success = false;
try {
- readHeader(in);
+ version = readHeader(in);
+ if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(in);
+ }
this.postingsReader.init(in);
seekDir(in);
@@ -92,13 +95,15 @@ public class FSTTermsReader extends Fiel
long sumDocFreq = in.readVLong();
int docCount = in.readVInt();
int longsSize = in.readVInt();
- TermsReader current = new TermsReader(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
+ TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
TermsReader previous = fields.put(fieldInfo.name, current);
- checkFieldSummary(state.segmentInfo, current, previous);
+ checkFieldSummary(state.segmentInfo, in, current, previous);
}
success = true;
} finally {
- if (!success) {
+ if (success) {
+ IOUtils.close(in);
+ } else {
IOUtils.closeWhileHandlingException(in);
}
}
@@ -110,10 +115,14 @@ public class FSTTermsReader extends Fiel
FSTTermsWriter.TERMS_VERSION_CURRENT);
}
private void seekDir(IndexInput in) throws IOException {
- in.seek(in.length() - 8);
+ if (version >= FSTTermsWriter.TERMS_VERSION_CHECKSUM) {
+ in.seek(in.length() - CodecUtil.footerLength() - 8);
+ } else {
+ in.seek(in.length() - 8);
+ }
in.seek(in.readLong());
}
- private void checkFieldSummary(SegmentInfo info, TermsReader field, TermsReader previous) throws IOException {
+ private void checkFieldSummary(SegmentInfo info, IndexInput in, TermsReader field, TermsReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.getDocCount()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
@@ -150,7 +159,7 @@ public class FSTTermsReader extends Fiel
@Override
public void close() throws IOException {
try {
- IOUtils.close(in, postingsReader);
+ IOUtils.close(postingsReader);
} finally {
fields.clear();
}
@@ -165,7 +174,7 @@ public class FSTTermsReader extends Fiel
final int longsSize;
final FST<FSTTermOutputs.TermData> dict;
- TermsReader(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
+ TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
@@ -729,4 +738,9 @@ public class FSTTermsReader extends Fiel
}
return ramBytesUsed;
}
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ postingsReader.checkIntegrity();
+ }
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java Fri Apr 4 10:27:05 2014
@@ -124,11 +124,12 @@ public class FSTTermsWriter extends Fiel
static final String TERMS_EXTENSION = "tmp";
static final String TERMS_CODEC_NAME = "FST_TERMS_DICT";
public static final int TERMS_VERSION_START = 0;
- public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_START;
+ public static final int TERMS_VERSION_CHECKSUM = 1;
+ public static final int TERMS_VERSION_CURRENT = TERMS_VERSION_CHECKSUM;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
- final IndexOutput out;
+ IndexOutput out;
final int maxDoc;
final List<FieldMetaData> fields = new ArrayList<>();
@@ -199,28 +200,32 @@ public class FSTTermsWriter extends Fiel
}
public void close() throws IOException {
- IOException ioe = null;
- try {
- // write field summary
- final long dirStart = out.getFilePointer();
-
- out.writeVInt(fields.size());
- for (FieldMetaData field : fields) {
- out.writeVInt(field.fieldInfo.number);
- out.writeVLong(field.numTerms);
- if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- out.writeVLong(field.sumTotalTermFreq);
+ if (out != null) {
+ IOException ioe = null;
+ try {
+ // write field summary
+ final long dirStart = out.getFilePointer();
+
+ out.writeVInt(fields.size());
+ for (FieldMetaData field : fields) {
+ out.writeVInt(field.fieldInfo.number);
+ out.writeVLong(field.numTerms);
+ if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ out.writeVLong(field.sumTotalTermFreq);
+ }
+ out.writeVLong(field.sumDocFreq);
+ out.writeVInt(field.docCount);
+ out.writeVInt(field.longsSize);
+ field.dict.save(out);
}
- out.writeVLong(field.sumDocFreq);
- out.writeVInt(field.docCount);
- out.writeVInt(field.longsSize);
- field.dict.save(out);
+ writeTrailer(out, dirStart);
+ CodecUtil.writeFooter(out);
+ } catch (IOException ioe2) {
+ ioe = ioe2;
+ } finally {
+ IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
+ out = null;
}
- writeTrailer(out, dirStart);
- } catch (IOException ioe2) {
- ioe = ioe2;
- } finally {
- IOUtils.closeWhileHandlingException(ioe, out, postingsWriter);
}
}
Modified: lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java (original)
+++ lucene/dev/branches/solr5914/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesConsumer.java Fri Apr 4 10:27:05 2014
@@ -59,7 +59,7 @@ import static org.apache.lucene.codecs.m
* Writer for {@link MemoryDocValuesFormat}
*/
class MemoryDocValuesConsumer extends DocValuesConsumer {
- final IndexOutput data, meta;
+ IndexOutput data, meta;
final int maxDoc;
final float acceptableOverheadRatio;
@@ -208,6 +208,10 @@ class MemoryDocValuesConsumer extends Do
try {
if (meta != null) {
meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
+ }
+ if (data != null) {
+ CodecUtil.writeFooter(data);
}
success = true;
} finally {
@@ -216,6 +220,7 @@ class MemoryDocValuesConsumer extends Do
} else {
IOUtils.closeWhileHandlingException(data, meta);
}
+ data = meta = null;
}
}