You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by yo...@apache.org on 2007/01/08 02:05:19 UTC
svn commit: r493897 -
/lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java
Author: yonik
Date: Sun Jan 7 17:05:19 2007
New Revision: 493897
URL: http://svn.apache.org/viewvc?view=rev&rev=493897
Log:
norms test: LUCENE-756
Added:
lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java (with props)
Added: lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java?view=auto&rev=493897
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java Sun Jan 7 17:05:19 2007
@@ -0,0 +1,224 @@
+package org.apache.lucene.index;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * Test that norms info is preserved during index life - incluidng seprate norms, addDocument, addIndexes, optimize.
+ */
+public class TestNorms extends TestCase {
+
+ private class SimilarityOne extends DefaultSimilarity {
+ public float lengthNorm(String fieldName, int numTerms) {
+ return 1;
+ }
+ }
+
+ private static final int NUM_FIELDS = 10;
+
+ private Similarity similarityOne;
+ private Analyzer anlzr;
+ private int numDocNorms;
+ private ArrayList norms;
+ private ArrayList modifiedNorms;
+ private float lastNorm = 0;
+ private float normDelta = (float) 0.001;
+
+ public TestNorms(String s) {
+ super(s);
+ }
+
+ protected void setUp() throws IOException {
+ similarityOne = new SimilarityOne();
+ anlzr = new StandardAnalyzer();
+ }
+
+ protected void tearDown() throws IOException {
+ }
+
+ /**
+ * Test that norms values are preserved as the index is maintained.
+ * Including separate norms.
+ * Including merging indexes with seprate norms.
+ * Including optimize.
+ */
+ public void testNorms() throws IOException {
+ // tmp dir
+ String tempDir = System.getProperty("java.io.tmpdir");
+ if (tempDir == null) {
+ throw new IOException("java.io.tmpdir undefined, cannot run test");
+ }
+
+ // test with a single index: index1
+ File indexDir1 = new File(tempDir, "lucenetestindex1");
+ Directory dir1 = FSDirectory.getDirectory(indexDir1, true);
+
+ norms = new ArrayList();
+ modifiedNorms = new ArrayList();
+
+ createIndex(dir1);
+ doTestNorms(dir1);
+
+ // test with a single index: index2
+ ArrayList norms1 = norms;
+ ArrayList modifiedNorms1 = modifiedNorms;
+ int numDocNorms1 = numDocNorms;
+
+ norms = new ArrayList();
+ modifiedNorms = new ArrayList();
+ numDocNorms = 0;
+
+ File indexDir2 = new File(tempDir, "lucenetestindex2");
+ Directory dir2 = FSDirectory.getDirectory(indexDir2, true);
+
+ createIndex(dir2);
+ doTestNorms(dir2);
+
+ // add index1 and index2 to a third index: index3
+ File indexDir3 = new File(tempDir, "lucenetestindex3");
+ Directory dir3 = FSDirectory.getDirectory(indexDir3, true);
+
+ createIndex(dir3);
+ IndexWriter iw = new IndexWriter(dir3,anlzr,false);
+ iw.setMaxBufferedDocs(5);
+ iw.setMergeFactor(3);
+ iw.addIndexes(new Directory[]{dir1,dir2});
+ iw.close();
+
+ norms1.addAll(norms);
+ norms = norms1;
+ modifiedNorms1.addAll(modifiedNorms);
+ modifiedNorms = modifiedNorms1;
+ numDocNorms += numDocNorms1;
+
+ // test with index3
+ verifyIndex(dir3);
+ doTestNorms(dir3);
+
+ // now with optimize
+ iw = new IndexWriter(dir3,anlzr,false);
+ iw.setMaxBufferedDocs(5);
+ iw.setMergeFactor(3);
+ iw.optimize();
+ iw.close();
+ verifyIndex(dir3);
+
+ dir1.close();
+ dir2.close();
+ dir3.close();
+ }
+
+ private void doTestNorms(Directory dir) throws IOException {
+ for (int i=0; i<5; i++) {
+ addDocs(dir,12,true);
+ verifyIndex(dir);
+ modifyNormsForF1(dir);
+ verifyIndex(dir);
+ addDocs(dir,12,false);
+ verifyIndex(dir);
+ modifyNormsForF1(dir);
+ verifyIndex(dir);
+ }
+ }
+
+ private void createIndex(Directory dir) throws IOException {
+ IndexWriter iw = new IndexWriter(dir,anlzr,true);
+ iw.setMaxBufferedDocs(5);
+ iw.setMergeFactor(3);
+ iw.setSimilarity(similarityOne);
+ iw.setUseCompoundFile(true);
+ iw.close();
+ }
+
+ private void modifyNormsForF1(Directory dir) throws IOException {
+ IndexReader ir = IndexReader.open(dir);
+ int n = ir.maxDoc();
+ for (int i = 0; i < n; i+=3) { // modify for every third doc
+ int k = (i*3) % modifiedNorms.size();
+ float origNorm = ((Float)modifiedNorms.get(i)).floatValue();
+ float newNorm = ((Float)modifiedNorms.get(k)).floatValue();
+ //System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
+ //System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
+ modifiedNorms.set(i, new Float(newNorm));
+ modifiedNorms.set(k, new Float(origNorm));
+ ir.setNorm(i, "f"+1, newNorm);
+ ir.setNorm(k, "f"+1, origNorm);
+ }
+ ir.close();
+ }
+
+
+ private void verifyIndex(Directory dir) throws IOException {
+ IndexReader ir = IndexReader.open(dir);
+ for (int i = 0; i < NUM_FIELDS; i++) {
+ String field = "f"+i;
+ byte b[] = ir.norms(field);
+ assertEquals("number of norms mismatches",numDocNorms,b.length);
+ ArrayList storedNorms = (i==1 ? modifiedNorms : norms);
+ for (int j = 0; j < b.length; j++) {
+ float norm = Similarity.decodeNorm(b[j]);
+ float norm1 = ((Float)storedNorms.get(j)).floatValue();
+ assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
+ }
+ }
+ }
+
+ private void addDocs(Directory dir, int ndocs, boolean compound) throws IOException {
+ IndexWriter iw = new IndexWriter(dir,anlzr,false);
+ iw.setMaxBufferedDocs(5);
+ iw.setMergeFactor(3);
+ iw.setSimilarity(similarityOne);
+ iw.setUseCompoundFile(compound);
+ for (int i = 0; i < ndocs; i++) {
+ iw.addDocument(newDoc());
+ }
+ iw.close();
+ }
+
+ // create the next document
+ private Document newDoc() {
+ Document d = new Document();
+ float boost = nextNorm();
+ for (int i = 0; i < 10; i++) {
+ Field f = new Field("f"+i,"v"+i,Store.NO,Index.UN_TOKENIZED);
+ f.setBoost(boost);
+ d.add(f);
+ }
+ return d;
+ }
+
+ // return unique norm values that are unchanged by encoding/decoding
+ private float nextNorm() {
+ float norm = lastNorm + normDelta;
+ do {
+ float norm1 = Similarity.decodeNorm(Similarity.encodeNorm(norm));
+ if (norm1 > lastNorm) {
+ //System.out.println(norm1+" > "+lastNorm);
+ norm = norm1;
+ break;
+ }
+ norm += normDelta;
+ } while (true);
+ norms.add(numDocNorms, new Float(norm));
+ modifiedNorms.add(numDocNorms, new Float(norm));
+ //System.out.println("creating norm("+numDocNorms+"): "+norm);
+ numDocNorms ++;
+ lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
+ return norm;
+ }
+
+}
Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/TestNorms.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL