You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/04/27 21:40:21 UTC
svn commit: r1097216 - in /lucene/dev/trunk: dev-tools/idea/.idea/
lucene/contrib/ant/lib/
lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/
lucene/contrib/highl...
Author: rmuir
Date: Wed Apr 27 19:40:18 2011
New Revision: 1097216
URL: http://svn.apache.org/viewvc?rev=1097216&view=rev
Log:
missing svn:eol-style
Modified:
lucene/dev/trunk/dev-tools/idea/.idea/projectCodeStyle.xml (props changed)
lucene/dev/trunk/lucene/contrib/ant/lib/JTIDY-LICENSE-MIT.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/apache1.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/apache1.1.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/apache2.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/cpl1.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/epl1.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/freebsd.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/gpl1.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/gpl2.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/gpl3.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/lgpl2.1.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/lgpl3.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/lpgl2.0.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/mit.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/mozilla1.1.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/mozilla_eula_firefox3.txt (props changed)
lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/test-files/docs/mozilla_eula_thunderbird2.txt (props changed)
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java (contents, props changed)
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (contents, props changed)
lucene/dev/trunk/lucene/contrib/lucli/lib/jline-NOTICE.txt (props changed)
lucene/dev/trunk/lucene/lib/ant-junit-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/lucene/lib/junit-NOTICE.txt (props changed)
lucene/dev/trunk/lucene/src/test-framework/overview.html (contents, props changed)
lucene/dev/trunk/lucene/src/tools/java/org/apache/lucene/validation/DependencyChecker.java (props changed)
lucene/dev/trunk/lucene/src/tools/java/org/apache/lucene/validation/LicenseType.java (props changed)
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java (props changed)
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/path/TestPathHierarchyTokenizer.java (props changed)
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java (contents, props changed)
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java (contents, props changed)
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java (contents, props changed)
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java (contents, props changed)
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java (contents, props changed)
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java (contents, props changed)
lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltPackageTaskTest.java (props changed)
lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java (contents, props changed)
lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java (props changed)
lucene/dev/trunk/solr/contrib/analysis-extras/lib/icu4j-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/clustering/lib/carrot2-core-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/clustering/lib/jackson-mapper-asl-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/clustering/lib/jackson-mapper-asl-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/clustering/lib/mahout-math-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/clustering/lib/mahout-math-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/clustering/lib/simple-xml-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/asm-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/bcmail-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/bcprov-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/dom4j-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/icu4j-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/metadata-extractor-LICENSE-PD.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/poi-ooxml-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/poi-ooxml-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/poi-ooxml-schemas-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/poi-ooxml-schemas-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/poi-ooxml-scratchpad-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/poi-scratchpad-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/rome-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/tagsoup-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/xercesImpl-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/xml-apis-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/extraction/lib/xml-apis-NOTICE.txt (props changed)
lucene/dev/trunk/solr/contrib/uima/lib/uima-an-alchemy-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/uima/lib/uima-an-calais-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/uima/lib/uima-an-tagger-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/uima/lib/uima-an-wst-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/contrib/uima/lib/uimaj-core-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/example/example-DIH/solr/db/lib/hsqldb-NOTICE.txt (props changed)
lucene/dev/trunk/solr/example/solr/conf/mapping-FoldToASCII.txt (props changed)
lucene/dev/trunk/solr/lib/apache-solr-noggit-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/guava-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/jcl-over-slf4j-LICENSE-BSD_LIKE.txt (props changed)
lucene/dev/trunk/solr/lib/jcl-over-slf4j-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/junit-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/log4j-over-slf4j-LICENSE-BSD_LIKE.txt (props changed)
lucene/dev/trunk/solr/lib/log4j-over-slf4j-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/servlet-api-LICENSE-ASL.txt (props changed)
lucene/dev/trunk/solr/lib/servlet-api-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/slf4j-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/slf4j-api-LICENSE-BSD_LIKE.txt (props changed)
lucene/dev/trunk/solr/lib/slf4j-api-NOTICE.txt (props changed)
lucene/dev/trunk/solr/lib/zookeeper-NOTICE.txt (props changed)
lucene/dev/trunk/solr/src/java/org/apache/solr/search/function/DoubleDocValues.java (props changed)
lucene/dev/trunk/solr/src/java/org/apache/solr/search/function/FloatDocValues.java (props changed)
lucene/dev/trunk/solr/src/java/org/apache/solr/search/function/IntDocValues.java (props changed)
lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookup.java (props changed)
lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/fst/InputStreamDataInput.java (props changed)
lucene/dev/trunk/solr/src/java/org/apache/solr/spelling/suggest/fst/OutputStreamDataOutput.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/Average.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/LookupBenchmarkTest.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterFSTTest.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/SuggesterTSTTest.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/TermFreq.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/TermFreqArrayIterator.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/spelling/suggest/fst/FSTLookupTest.java (props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/update/UpdateParamsTest.java (contents, props changed)
lucene/dev/trunk/solr/src/test/org/apache/solr/util/ExternalPaths.java (props changed)
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java Wed Apr 27 19:40:18 2011
@@ -1 +1,57 @@
-package org.apache.lucene.search.highlight;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache
.lucene.analysis.tokenattributes.OffsetAttribute;
/**
* This TokenFilter limits the number of tokens while indexing by adding up the
* current offset.
*/
public final class OffsetLimitTokenFilter extends TokenFilter {
private int offsetCount;
private OffsetAttribute offsetAttrib = getAttribute(OffsetAttribute.class);
private int offsetLimit;
public OffsetLimitTokenFilter(TokenStream input, int offsetLimit) {
super(input);
this.offsetLimit = offsetLimit;
}
@Override
public boolean incrementToken() throws IOException {
if (offsetCount < offsetLimit && input.incrementToken()) {
int offsetLength = offsetAttrib.endOffset() - offsetAttrib.startOffset();
offsetCount += offsetLength;
return true;
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
offsetCount = 0;
}
}
\ No newline at end of file
+package org.apache.lucene.search.highlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+
+/**
+ * This TokenFilter limits the number of tokens while indexing by adding up the
+ * current offset.
+ */
+public final class OffsetLimitTokenFilter extends TokenFilter {
+
+ private int offsetCount;
+ private OffsetAttribute offsetAttrib = getAttribute(OffsetAttribute.class);
+ private int offsetLimit;
+
+ public OffsetLimitTokenFilter(TokenStream input, int offsetLimit) {
+ super(input);
+ this.offsetLimit = offsetLimit;
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (offsetCount < offsetLimit && input.incrementToken()) {
+ int offsetLength = offsetAttrib.endOffset() - offsetAttrib.startOffset();
+ offsetCount += offsetLength;
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ offsetCount = 0;
+ }
+
+}
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java Wed Apr 27 19:40:18 2011
@@ -1 +1,60 @@
-package org.apache.lucene.search.highlight;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseToken
StreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
public void testFilter() throws Exception {
TokenStream stream = new MockTokenizer(new StringReader(
"short toolong evenmuchlongertext a ab toolong foo"),
MockTokenizer.WHITESPACE, false);
OffsetLimitTokenFilter filter = new OffsetLimitTokenFilter(stream, 10);
assertTokenStreamContents(filter, new String[] {"short", "toolong"});
stream = new MockTokenizer(new StringReader(
"short toolong evenmuchlongertext a ab toolong foo"),
MockTokenizer.WHITESPACE, false);
filter = new OffsetLimitTokenFilter(stream, 12);
assertTokenStreamContents(filter, new String[] {"short", "toolong"});
stream = new MockTokenizer(new StringReader(
"short toolong evenmuchlongertext a ab toolong foo"),
MockTokenizer.WHITESPACE, false);
filter = new OffsetLimitTokenFilter(stream, 30);
assertTokenStreamContents(filter, new String[] {"short", "toolong",
"evenmuchlongertext"});
checkOneTermReuse(new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new OffsetLimitTokenFilter(new MockTokenizer(reader,
MockTokenizer.WHITESPACE, false), 10);
}
}, "llenges", "llenges");
}
}
\ No newline at end of file
+package org.apache.lucene.search.highlight;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
+
+ public void testFilter() throws Exception {
+ TokenStream stream = new MockTokenizer(new StringReader(
+ "short toolong evenmuchlongertext a ab toolong foo"),
+ MockTokenizer.WHITESPACE, false);
+ OffsetLimitTokenFilter filter = new OffsetLimitTokenFilter(stream, 10);
+ assertTokenStreamContents(filter, new String[] {"short", "toolong"});
+
+ stream = new MockTokenizer(new StringReader(
+ "short toolong evenmuchlongertext a ab toolong foo"),
+ MockTokenizer.WHITESPACE, false);
+ filter = new OffsetLimitTokenFilter(stream, 12);
+ assertTokenStreamContents(filter, new String[] {"short", "toolong"});
+
+ stream = new MockTokenizer(new StringReader(
+ "short toolong evenmuchlongertext a ab toolong foo"),
+ MockTokenizer.WHITESPACE, false);
+ filter = new OffsetLimitTokenFilter(stream, 30);
+ assertTokenStreamContents(filter, new String[] {"short", "toolong",
+ "evenmuchlongertext"});
+
+
+ checkOneTermReuse(new Analyzer() {
+
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new OffsetLimitTokenFilter(new MockTokenizer(reader,
+ MockTokenizer.WHITESPACE, false), 10);
+ }
+ }, "llenges", "llenges");
+ }
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/src/test-framework/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/overview.html?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/overview.html (original)
+++ lucene/dev/trunk/lucene/src/test-framework/overview.html Wed Apr 27 19:40:18 2011
@@ -1,28 +1,28 @@
-<html>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<head>
- <title>Apache Lucene Test Framework API</title>
-</head>
-<body>
-<p>
- The Lucene Test Framework is used by Lucene as the basis for its tests.
- The framework can also be used for testing third-party code that uses
- the Lucene API.
-</p>
-</body>
-</html>
+<html>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<head>
+ <title>Apache Lucene Test Framework API</title>
+</head>
+<body>
+<p>
+ The Lucene Test Framework is used by Lucene as the basis for its tests.
+ The framework can also be used for testing third-party code that uses
+ the Lucene API.
+</p>
+</body>
+</html>
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java Wed Apr 27 19:40:18 2011
@@ -1,136 +1,136 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-
-/**
- * Parser for trec doc content, invoked on doc text excluding <DOC> and <DOCNO>
- * which are handled in TrecContentSource. Required to be stateless and hence thread safe.
- */
-public abstract class TrecDocParser {
-
- /** Types of trec parse paths, */
- public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES }
-
- /** trec parser type used for unknown extensions */
- public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
-
- static final Map<ParsePathType,TrecDocParser> pathType2parser = new HashMap<ParsePathType,TrecDocParser>();
- static {
- pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser());
- pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser());
- pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser());
- pathType2parser.put(ParsePathType.FT, new TrecFTParser());
- pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser());
- }
-
- static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
- static {
- for (ParsePathType ppt : ParsePathType.values()) {
- pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt);
- }
- }
-
- /** max length of walk up from file to its ancestors when looking for a known path type */
- private static final int MAX_PATH_LENGTH = 10;
-
- /**
- * Compute the path type of a file by inspecting name of file and its parents
- */
- public static ParsePathType pathType(File f) {
- int pathLength = 0;
- while (f != null && ++pathLength < MAX_PATH_LENGTH) {
- ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH));
- if (ppt!=null) {
- return ppt;
- }
- f = f.getParentFile();
- }
- return DEFAULT_PATH_TYPE;
- }
-
- /**
- * parse the text prepared in docBuf into a result DocData,
- * no synchronization is required.
- * @param docData reusable result
- * @param name name that should be set to the result
- * @param trecSrc calling trec content source
- * @param docBuf text to parse
- * @param pathType type of parsed file, or null if unknown - may be used by
- * parsers to alter their behavior according to the file path type.
- */
- public abstract DocData parse(DocData docData, String name, TrecContentSource trecSrc,
- StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException;
-
- /**
- * strip tags from <code>buf</code>: each tag is replaced by a single blank.
- * @return text obtained when stripping all tags from <code>buf</code> (Input StringBuilder is unmodified).
- */
- public static String stripTags(StringBuilder buf, int start) {
- return stripTags(buf.substring(start),0);
- }
-
- /**
- * strip tags from input.
- * @see #stripTags(StringBuilder, int)
- */
- public static String stripTags(String buf, int start) {
- if (start>0) {
- buf = buf.substring(0);
- }
- return buf.replaceAll("<[^>]*>", " ");
- }
-
- /**
- * Extract from <code>buf</code> the text of interest within specified tags
- * @param buf entire input text
- * @param startTag tag marking start of text of interest
- * @param endTag tag marking end of text of interest
- * @param maxPos if ≥ 0 sets a limit on start of text of interest
- * @return text of interest or null if not found
- */
- public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String noisePrefixes[]) {
- int k1 = buf.indexOf(startTag);
- if (k1>=0 && (maxPos<0 || k1<maxPos)) {
- k1 += startTag.length();
- int k2 = buf.indexOf(endTag,k1);
- if (k2>=0 && (maxPos<0 || k2<maxPos)) { // found end tag with allowed range
- if (noisePrefixes != null) {
- for (String noise : noisePrefixes) {
- int k1a = buf.indexOf(noise,k1);
- if (k1a>=0 && k1a<k2) {
- k1 = k1a + noise.length();
- }
- }
- }
- return buf.substring(k1,k2).trim();
- }
- }
- return null;
- }
-
- //public static void main(String[] args) {
- // System.out.println(stripTags("is it true that<space>2<<second space>><almost last space>1<one more space>?",0));
- //}
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+/**
+ * Parser for trec doc content, invoked on doc text excluding <DOC> and <DOCNO>
+ * which are handled in TrecContentSource. Required to be stateless and hence thread safe.
+ */
+public abstract class TrecDocParser {
+
+ /** Types of trec parse paths, */
+ public enum ParsePathType { GOV2, FBIS, FT, FR94, LATIMES }
+
+ /** trec parser type used for unknown extensions */
+ public static final ParsePathType DEFAULT_PATH_TYPE = ParsePathType.GOV2;
+
+ static final Map<ParsePathType,TrecDocParser> pathType2parser = new HashMap<ParsePathType,TrecDocParser>();
+ static {
+ pathType2parser.put(ParsePathType.GOV2, new TrecGov2Parser());
+ pathType2parser.put(ParsePathType.FBIS, new TrecFBISParser());
+ pathType2parser.put(ParsePathType.FR94, new TrecFR94Parser());
+ pathType2parser.put(ParsePathType.FT, new TrecFTParser());
+ pathType2parser.put(ParsePathType.LATIMES, new TrecLATimesParser());
+ }
+
+ static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
+ static {
+ for (ParsePathType ppt : ParsePathType.values()) {
+ pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt);
+ }
+ }
+
+ /** max length of walk up from file to its ancestors when looking for a known path type */
+ private static final int MAX_PATH_LENGTH = 10;
+
+ /**
+ * Compute the path type of a file by inspecting name of file and its parents
+ */
+ public static ParsePathType pathType(File f) {
+ int pathLength = 0;
+ while (f != null && ++pathLength < MAX_PATH_LENGTH) {
+ ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH));
+ if (ppt!=null) {
+ return ppt;
+ }
+ f = f.getParentFile();
+ }
+ return DEFAULT_PATH_TYPE;
+ }
+
+ /**
+ * parse the text prepared in docBuf into a result DocData,
+ * no synchronization is required.
+ * @param docData reusable result
+ * @param name name that should be set to the result
+ * @param trecSrc calling trec content source
+ * @param docBuf text to parse
+ * @param pathType type of parsed file, or null if unknown - may be used by
+ * parsers to alter their behavior according to the file path type.
+ */
+ public abstract DocData parse(DocData docData, String name, TrecContentSource trecSrc,
+ StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException;
+
+ /**
+ * strip tags from <code>buf</code>: each tag is replaced by a single blank.
+ * @return text obtained when stripping all tags from <code>buf</code> (Input StringBuilder is unmodified).
+ */
+ public static String stripTags(StringBuilder buf, int start) {
+ return stripTags(buf.substring(start),0);
+ }
+
+ /**
+ * strip tags from input.
+ * @see #stripTags(StringBuilder, int)
+ */
+ public static String stripTags(String buf, int start) {
+ if (start>0) {
+ buf = buf.substring(0);
+ }
+ return buf.replaceAll("<[^>]*>", " ");
+ }
+
+ /**
+ * Extract from <code>buf</code> the text of interest within specified tags
+ * @param buf entire input text
+ * @param startTag tag marking start of text of interest
+ * @param endTag tag marking end of text of interest
+ * @param maxPos if ≥ 0 sets a limit on start of text of interest
+ * @return text of interest or null if not found
+ */
+ public static String extract(StringBuilder buf, String startTag, String endTag, int maxPos, String noisePrefixes[]) {
+ int k1 = buf.indexOf(startTag);
+ if (k1>=0 && (maxPos<0 || k1<maxPos)) {
+ k1 += startTag.length();
+ int k2 = buf.indexOf(endTag,k1);
+ if (k2>=0 && (maxPos<0 || k2<maxPos)) { // found end tag with allowed range
+ if (noisePrefixes != null) {
+ for (String noise : noisePrefixes) {
+ int k1a = buf.indexOf(noise,k1);
+ if (k1a>=0 && k1a<k2) {
+ k1 = k1a + noise.length();
+ }
+ }
+ }
+ return buf.substring(k1,k2).trim();
+ }
+ }
+ return null;
+ }
+
+ //public static void main(String[] args) {
+ // System.out.println(stripTags("is it true that<space>2<<second space>><almost last space>1<one more space>?",0));
+ //}
+
+}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java Wed Apr 27 19:40:18 2011
@@ -1,65 +1,65 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FBIS docs in trec disks 4+5 collection format
- */
-public class TrecFBISParser extends TrecDocParser {
-
- private static final String HEADER = "<HEADER>";
- private static final String HEADER_END = "</HEADER>";
- private static final int HEADER_END_LENGTH = HEADER_END.length();
-
- private static final String DATE1 = "<DATE1>";
- private static final String DATE1_END = "</DATE1>";
-
- private static final String TI = "<TI>";
- private static final String TI_END = "</TI>";
-
- @Override
- public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
- StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
- int mark = 0; // that much is skipped
- // optionally skip some of the text, set date, title
- Date date = null;
- String title = null;
- int h1 = docBuf.indexOf(HEADER);
- if (h1>=0) {
- int h2 = docBuf.indexOf(HEADER_END,h1);
- mark = h2+HEADER_END_LENGTH;
- // date...
- String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null);
- if (dateStr != null) {
- date = trecSrc.parseDate(dateStr);
- }
- // title...
- title = extract(docBuf, TI, TI_END, h2, null);
- }
- docData.clear();
- docData.setName(name);
- docData.setDate(date);
- docData.setTitle(title);
- docData.setBody(stripTags(docBuf, mark).toString());
- return docData;
- }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FBIS docs in trec disks 4+5 collection format
+ */
+public class TrecFBISParser extends TrecDocParser {
+
+ private static final String HEADER = "<HEADER>";
+ private static final String HEADER_END = "</HEADER>";
+ private static final int HEADER_END_LENGTH = HEADER_END.length();
+
+ private static final String DATE1 = "<DATE1>";
+ private static final String DATE1_END = "</DATE1>";
+
+ private static final String TI = "<TI>";
+ private static final String TI_END = "</TI>";
+
+ @Override
+ public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
+ StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+ int mark = 0; // that much is skipped
+ // optionally skip some of the text, set date, title
+ Date date = null;
+ String title = null;
+ int h1 = docBuf.indexOf(HEADER);
+ if (h1>=0) {
+ int h2 = docBuf.indexOf(HEADER_END,h1);
+ mark = h2+HEADER_END_LENGTH;
+ // date...
+ String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null);
+ if (dateStr != null) {
+ date = trecSrc.parseDate(dateStr);
+ }
+ // title...
+ title = extract(docBuf, TI, TI_END, h2, null);
+ }
+ docData.clear();
+ docData.setName(name);
+ docData.setDate(date);
+ docData.setTitle(title);
+ docData.setBody(stripTags(docBuf, mark).toString());
+ return docData;
+ }
+
+}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java Wed Apr 27 19:40:18 2011
@@ -1,66 +1,66 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FR94 docs in trec disks 4+5 collection format
- */
-public class TrecFR94Parser extends TrecDocParser {
-
- private static final String TEXT = "<TEXT>";
- private static final int TEXT_LENGTH = TEXT.length();
- private static final String TEXT_END = "</TEXT>";
-
- private static final String DATE = "<DATE>";
- private static final String[] DATE_NOISE_PREFIXES = {
- "DATE:",
- "date:", //TODO improve date extraction for this format
- "t.c.",
- };
- private static final String DATE_END = "</DATE>";
-
- //TODO can we also extract title for this format?
-
- @Override
- public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
- StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
- int mark = 0; // that much is skipped
- // optionally skip some of the text, set date (no title?)
- Date date = null;
- int h1 = docBuf.indexOf(TEXT);
- if (h1>=0) {
- int h2 = docBuf.indexOf(TEXT_END,h1);
- mark = h1+TEXT_LENGTH;
- // date...
- String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
- if (dateStr != null) {
- dateStr = stripTags(dateStr,0).toString();
- date = trecSrc.parseDate(dateStr.trim());
- }
- }
- docData.clear();
- docData.setName(name);
- docData.setDate(date);
- docData.setBody(stripTags(docBuf, mark).toString());
- return docData;
- }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FR94 docs in trec disks 4+5 collection format
+ */
+public class TrecFR94Parser extends TrecDocParser {
+
+ private static final String TEXT = "<TEXT>";
+ private static final int TEXT_LENGTH = TEXT.length();
+ private static final String TEXT_END = "</TEXT>";
+
+ private static final String DATE = "<DATE>";
+ private static final String[] DATE_NOISE_PREFIXES = {
+ "DATE:",
+ "date:", //TODO improve date extraction for this format
+ "t.c.",
+ };
+ private static final String DATE_END = "</DATE>";
+
+ //TODO can we also extract title for this format?
+
+ @Override
+ public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
+ StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+ int mark = 0; // that much is skipped
+ // optionally skip some of the text, set date (no title?)
+ Date date = null;
+ int h1 = docBuf.indexOf(TEXT);
+ if (h1>=0) {
+ int h2 = docBuf.indexOf(TEXT_END,h1);
+ mark = h1+TEXT_LENGTH;
+ // date...
+ String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
+ if (dateStr != null) {
+ dateStr = stripTags(dateStr,0).toString();
+ date = trecSrc.parseDate(dateStr.trim());
+ }
+ }
+ docData.clear();
+ docData.setName(name);
+ docData.setDate(date);
+ docData.setBody(stripTags(docBuf, mark).toString());
+ return docData;
+ }
+
+}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java Wed Apr 27 19:40:18 2011
@@ -1,57 +1,57 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FT docs in trec disks 4+5 collection format
- */
-public class TrecFTParser extends TrecDocParser {
-
- private static final String DATE = "<DATE>";
- private static final String DATE_END = "</DATE>";
-
- private static final String HEADLINE = "<HEADLINE>";
- private static final String HEADLINE_END = "</HEADLINE>";
-
- @Override
- public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
- StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
- int mark = 0; // that much is skipped
-
- // date...
- Date date = null;
- String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
- if (dateStr != null) {
- date = trecSrc.parseDate(dateStr);
- }
-
- // title...
- String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
-
- docData.clear();
- docData.setName(name);
- docData.setDate(date);
- docData.setTitle(title);
- docData.setBody(stripTags(docBuf, mark).toString());
- return docData;
- }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FT docs in trec disks 4+5 collection format
+ */
+public class TrecFTParser extends TrecDocParser {
+
+ private static final String DATE = "<DATE>";
+ private static final String DATE_END = "</DATE>";
+
+ private static final String HEADLINE = "<HEADLINE>";
+ private static final String HEADLINE_END = "</HEADLINE>";
+
+ @Override
+ public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
+ StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+ int mark = 0; // that much is skipped
+
+ // date...
+ Date date = null;
+ String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
+ if (dateStr != null) {
+ date = trecSrc.parseDate(dateStr);
+ }
+
+ // title...
+ String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
+
+ docData.clear();
+ docData.setName(name);
+ docData.setDate(date);
+ docData.setTitle(title);
+ docData.setBody(stripTags(docBuf, mark).toString());
+ return docData;
+ }
+
+}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java Wed Apr 27 19:40:18 2011
@@ -1,71 +1,71 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Date;
-
-/**
- * Parser for the FT docs in trec disks 4+5 collection format
- */
-public class TrecLATimesParser extends TrecDocParser {
-
- private static final String DATE = "<DATE>";
- private static final String DATE_END = "</DATE>";
- private static final String DATE_NOISE = "day,"; // anything aftre the ','
-
- private static final String SUBJECT = "<SUBJECT>";
- private static final String SUBJECT_END = "</SUBJECT>";
- private static final String HEADLINE = "<HEADLINE>";
- private static final String HEADLINE_END = "</HEADLINE>";
-
- @Override
- public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
- StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
- int mark = 0; // that much is skipped
-
- // date...
- Date date = null;
- String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
- if (dateStr != null) {
- int d2a = dateStr.indexOf(DATE_NOISE);
- if (d2a > 0) {
- dateStr = dateStr.substring(0,d2a+3); // we need the "day" part
- }
- dateStr = stripTags(dateStr,0).toString();
- date = trecSrc.parseDate(dateStr.trim());
- }
-
- // title... first try with SUBJECT, them with HEADLINE
- String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null);
- if (title==null) {
- title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
- }
- if (title!=null) {
- title = stripTags(title,0).toString().trim();
- }
-
- docData.clear();
- docData.setName(name);
- docData.setDate(date);
- docData.setTitle(title);
- docData.setBody(stripTags(docBuf, mark).toString());
- return docData;
- }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * Parser for the FT docs in trec disks 4+5 collection format
+ */
+public class TrecLATimesParser extends TrecDocParser {
+
+ private static final String DATE = "<DATE>";
+ private static final String DATE_END = "</DATE>";
+ private static final String DATE_NOISE = "day,"; // anything aftre the ','
+
+ private static final String SUBJECT = "<SUBJECT>";
+ private static final String SUBJECT_END = "</SUBJECT>";
+ private static final String HEADLINE = "<HEADLINE>";
+ private static final String HEADLINE_END = "</HEADLINE>";
+
+ @Override
+ public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
+ StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+ int mark = 0; // that much is skipped
+
+ // date...
+ Date date = null;
+ String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
+ if (dateStr != null) {
+ int d2a = dateStr.indexOf(DATE_NOISE);
+ if (d2a > 0) {
+ dateStr = dateStr.substring(0,d2a+3); // we need the "day" part
+ }
+ dateStr = stripTags(dateStr,0).toString();
+ date = trecSrc.parseDate(dateStr.trim());
+ }
+
+ // title... first try with SUBJECT, them with HEADLINE
+ String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null);
+ if (title==null) {
+ title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
+ }
+ if (title!=null) {
+ title = stripTags(title,0).toString().trim();
+ }
+
+ docData.clear();
+ docData.setName(name);
+ docData.setDate(date);
+ docData.setTitle(title);
+ docData.setBody(stripTags(docBuf, mark).toString());
+ return docData;
+ }
+
+}
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java Wed Apr 27 19:40:18 2011
@@ -1,33 +1,33 @@
-package org.apache.lucene.benchmark.byTask.feeds;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-/**
- * Parser for trec docs which selects the parser to apply according
- * to the source files path, defaulting to {@link TrecGov2Parser}.
- */
-public class TrecParserByPath extends TrecDocParser {
-
- @Override
- public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
- StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
- return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
- }
-
-}
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+/**
+ * Parser for trec docs which selects the parser to apply according
+ * to the source files path, defaulting to {@link TrecGov2Parser}.
+ */
+public class TrecParserByPath extends TrecDocParser {
+
+ @Override
+ public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
+ StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
+ return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
+ }
+
+}
Modified: lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/alt/AltTestTask.java Wed Apr 27 19:40:18 2011
@@ -1,37 +1,37 @@
-package org.apache.lucene.benchmark.byTask.tasks.alt;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
-
-/**
- * {@link PerfTask} which does nothing, but is in a different package
- */
-public class AltTestTask extends PerfTask {
-
- public AltTestTask(PerfRunData runData) {
- super(runData);
- }
-
- @Override
- public int doLogic() throws Exception {
- return 0;
- }
-
-}
+package org.apache.lucene.benchmark.byTask.tasks.alt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
+
+/**
+ * {@link PerfTask} which does nothing, but is in a different package
+ */
+public class AltTestTask extends PerfTask {
+
+ public AltTestTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws Exception {
+ return 0;
+ }
+
+}
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/update/UpdateParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/update/UpdateParamsTest.java?rev=1097216&r1=1097215&r2=1097216&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/update/UpdateParamsTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/update/UpdateParamsTest.java Wed Apr 27 19:40:18 2011
@@ -1 +1,77 @@
-/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update;
import java.util.HashMap;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr
.core.*;
import org.apache.solr.handler.XmlUpdateRequestHandler;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.util.AbstractSolrTestCase;
public class UpdateParamsTest extends AbstractSolrTestCase {
@Override
public String getSchemaFile() { return "schema.xml"; }
@Override
public String getSolrConfigFile() { return "solrconfig.xml"; }
/**
* Tests that both update.chain and update.processor works
* NOTE: This test will fail when support for update.processor is removed and should then be removed
*/
public void testUpdateProcessorParamDeprecation() throws Exception {
SolrCore core = h.getCore();
XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler();
handler.init( null );
MapSolrParams params = new MapSolrParams( new HashMap<String, String>() );
params.getMap().put(UpdateParams.UPDATE_CHAIN_DEPRECATED, "nonexistant");
// Add a si
ngle document
SolrQueryResponse rsp = new SolrQueryResponse();
SolrQueryRequestBase req = new SolrQueryRequestBase( core, params ) {};
// First check that the old param behaves as it should
try {
handler.handleRequestBody(req, rsp);
assertFalse("Faulty update.processor parameter (deprecated but should work) not causing an error - i.e. it is not detected", true);
} catch (Exception e) {
assertEquals("Got wrong exception while testing update.chain", e.getMessage(), "unknown UpdateRequestProcessorChain: nonexistant");
}
// Then check that the new param behaves correctly
params.getMap().remove(UpdateParams.UPDATE_CHAIN_DEPRECATED);
params.getMap().put(UpdateParams.UPDATE_CHAIN, "nonexistant");
req.setParams(params);
try {
handler.handleRequestBody(req, rsp);
assertFalse("Faulty update.chain parameter not causing an error - i.e. it is not detected", true);
} catch (Exception e) {
assertEqu
als("Got wrong exception while testing update.chain", e.getMessage(), "unknown UpdateRequestProcessorChain: nonexistant");
}
}
}
\ No newline at end of file
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update;
+
+import java.util.HashMap;
+
+import org.apache.solr.common.params.MapSolrParams;
+import org.apache.solr.common.params.UpdateParams;
+import org.apache.solr.core.*;
+import org.apache.solr.handler.XmlUpdateRequestHandler;
+import org.apache.solr.request.SolrQueryRequestBase;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.util.AbstractSolrTestCase;
+
+
+
+public class UpdateParamsTest extends AbstractSolrTestCase {
+
+ @Override
+ public String getSchemaFile() { return "schema.xml"; }
+ @Override
+ public String getSolrConfigFile() { return "solrconfig.xml"; }
+
+ /**
+ * Tests that both update.chain and update.processor works
+ * NOTE: This test will fail when support for update.processor is removed and should then be removed
+ */
+ public void testUpdateProcessorParamDeprecation() throws Exception {
+ SolrCore core = h.getCore();
+
+ XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler();
+ handler.init( null );
+
+ MapSolrParams params = new MapSolrParams( new HashMap<String, String>() );
+ params.getMap().put(UpdateParams.UPDATE_CHAIN_DEPRECATED, "nonexistant");
+
+ // Add a single document
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ SolrQueryRequestBase req = new SolrQueryRequestBase( core, params ) {};
+
+ // First check that the old param behaves as it should
+ try {
+ handler.handleRequestBody(req, rsp);
+ assertFalse("Faulty update.processor parameter (deprecated but should work) not causing an error - i.e. it is not detected", true);
+ } catch (Exception e) {
+ assertEquals("Got wrong exception while testing update.chain", e.getMessage(), "unknown UpdateRequestProcessorChain: nonexistant");
+ }
+
+ // Then check that the new param behaves correctly
+ params.getMap().remove(UpdateParams.UPDATE_CHAIN_DEPRECATED);
+ params.getMap().put(UpdateParams.UPDATE_CHAIN, "nonexistant");
+ req.setParams(params);
+ try {
+ handler.handleRequestBody(req, rsp);
+ assertFalse("Faulty update.chain parameter not causing an error - i.e. it is not detected", true);
+ } catch (Exception e) {
+ assertEquals("Got wrong exception while testing update.chain", e.getMessage(), "unknown UpdateRequestProcessorChain: nonexistant");
+ }
+
+ }
+
+}