You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2014/03/26 13:07:23 UTC
svn commit: r1581801 - in /jena/trunk/jena-text/src:
main/java/org/apache/jena/query/text/assembler/
test/java/org/apache/jena/query/text/
Author: andy
Date: Wed Mar 26 12:07:22 2014
New Revision: 1581801
URL: http://svn.apache.org/r1581801
Log:
JENA-654 : Configurable Lucene Analyzer for index
Added:
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/KeywordAnalyzerAssembler.java
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/SimpleAnalyzerAssembler.java
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/StandardAnalyzerAssembler.java
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithTextIndexBase.java
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithSimpleAnalyzer.java
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithStandardAnalyzer.java
Added: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/KeywordAnalyzerAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/KeywordAnalyzerAssembler.java?rev=1581801&view=auto
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/KeywordAnalyzerAssembler.java (added)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/KeywordAnalyzerAssembler.java Wed Mar 26 12:07:22 2014
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler ;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+
+import com.hp.hpl.jena.assembler.Assembler;
+import com.hp.hpl.jena.assembler.Mode;
+import com.hp.hpl.jena.assembler.assemblers.AssemblerBase;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * Assembler to create keyword analyzers.
+ */
+public class KeywordAnalyzerAssembler extends AssemblerBase {
+ /*
+ text:map (
+ [ text:field "text" ;
+ text:predicate rdfs:label;
+ text:analyzer [
+ a lucene:KeywordAnalyzer ; ]
+ ]
+ .
+ */
+
+ @Override
+ public Analyzer open(Assembler a, Resource root, Mode mode) {
+ return new KeywordAnalyzer();
+ }
+}
Added: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/SimpleAnalyzerAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/SimpleAnalyzerAssembler.java?rev=1581801&view=auto
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/SimpleAnalyzerAssembler.java (added)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/SimpleAnalyzerAssembler.java Wed Mar 26 12:07:22 2014
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler ;
+
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.SimpleAnalyzer;
+
+import com.hp.hpl.jena.assembler.Assembler;
+import com.hp.hpl.jena.assembler.Mode;
+import com.hp.hpl.jena.assembler.assemblers.AssemblerBase;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * Assembler to create simple analyzers.
+ */
+public class SimpleAnalyzerAssembler extends AssemblerBase {
+ /*
+ text:map (
+ [ text:field "text" ;
+ text:predicate rdfs:label;
+ text:analyzer [
+ a lucene:SimpleAnalyzer ;
+ ]
+ ]
+ .
+ */
+
+ @Override
+ public Analyzer open(Assembler a, Resource root, Mode mode) {
+ return new SimpleAnalyzer(TextIndexLucene.VER);
+ }
+}
Added: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/StandardAnalyzerAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/StandardAnalyzerAssembler.java?rev=1581801&view=auto
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/StandardAnalyzerAssembler.java (added)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/StandardAnalyzerAssembler.java Wed Mar 26 12:07:22 2014
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler ;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.query.text.TextIndexException;
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.util.CharArraySet;
+
+import com.hp.hpl.jena.assembler.Assembler;
+import com.hp.hpl.jena.assembler.Mode;
+import com.hp.hpl.jena.assembler.assemblers.AssemblerBase;
+import com.hp.hpl.jena.rdf.model.Literal;
+import com.hp.hpl.jena.rdf.model.RDFNode;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.hp.hpl.jena.rdf.model.Statement;
+import com.hp.hpl.jena.vocabulary.RDF;
+
+/**
+ * Assembler to create standard analyzers with keyword list.
+ */
+public class StandardAnalyzerAssembler extends AssemblerBase {
+ /*
+ text:map (
+ [ text:field "text" ;
+ text:predicate rdfs:label;
+ text:analyzer [
+ a lucene:StandardAnalyzer ;
+ text:stopWords ("foo" "bar" "baz") # optional
+ ]
+ ]
+ .
+ */
+
+ @Override
+ public Analyzer open(Assembler a, Resource root, Mode mode) {
+ if (root.hasProperty(TextVocab.pStopWords)) {
+ return analyzerWithStopWords(root);
+ } else {
+ return new StandardAnalyzer(TextIndexLucene.VER);
+ }
+ }
+
+ private Analyzer analyzerWithStopWords(Resource root) {
+ RDFNode node = root.getProperty(TextVocab.pStopWords).getObject();
+ if (! node.isResource()) {
+ throw new TextIndexException("text:stopWords property takes a list as a value : " + node);
+ }
+ CharArraySet stopWords = toCharArraySet((Resource) node);
+ return new StandardAnalyzer(TextIndexLucene.VER, stopWords);
+ }
+
+ private CharArraySet toCharArraySet(Resource list) {
+ return new CharArraySet(TextIndexLucene.VER, toList(list), false);
+ }
+
+ private List<String> toList(Resource list) {
+ List<String> result = new ArrayList<String>();
+ Resource current = list;
+ while (current != null && ! current.equals(RDF.nil)){
+ Statement stmt = current.getProperty(RDF.first);
+ if (stmt == null) {
+ throw new TextIndexException("stop word list not well formed");
+ }
+ RDFNode node = stmt.getObject();
+ if (! node.isLiteral()) {
+ throw new TextIndexException("stop word is not a literal : " + node);
+ }
+ result.add(((Literal)node).getLexicalForm());
+ stmt = current.getProperty(RDF.rest);
+ if (stmt == null) {
+ throw new TextIndexException("stop word list not terminated by rdf:nil");
+ }
+ node = stmt.getObject();
+ if (! node.isResource()) {
+ throw new TextIndexException("stop word list node is not a resource : " + node);
+ }
+ current = (Resource) node;
+ }
+ return result;
+ }
+}
Added: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithTextIndexBase.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithTextIndexBase.java?rev=1581801&view=auto
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithTextIndexBase.java (added)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithTextIndexBase.java Wed Mar 26 12:07:22 2014
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Set;
+
+import org.apache.jena.atlas.lib.StrUtils;
+
+import com.hp.hpl.jena.query.Dataset;
+import com.hp.hpl.jena.query.Query;
+import com.hp.hpl.jena.query.QueryExecution;
+import com.hp.hpl.jena.query.QueryExecutionFactory;
+import com.hp.hpl.jena.query.QueryFactory;
+import com.hp.hpl.jena.query.ReadWrite;
+import com.hp.hpl.jena.query.ResultSet;
+import com.hp.hpl.jena.rdf.model.Model;
+
+/*
+ * This abstract class defines a collection of test methods for testing
+ * test searches. Its subclasses create a dataset using the index to
+ * to be tested and then call the test methods in this class to run
+ * the actual tests.
+ */
+public abstract class AbstractTestDatasetWithTextIndexBase {
+ protected static final String RESOURCE_BASE = "http://example.org/data/resource/";
+ protected static Dataset dataset;
+ protected static final String QUERY_PROLOG =
+ StrUtils.strjoinNL(
+ "PREFIX text: <http://jena.apache.org/text#>",
+ "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>"
+ );
+
+ protected static final String TURTLE_PROLOG =
+ StrUtils.strjoinNL(
+ "@prefix text: <http://jena.apache.org/text#> .",
+ "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ."
+ );
+
+ protected void doTestSearch(String turtle, String queryString, Set<String> expectedEntityURIs) {
+ doTestSearch("", turtle, queryString, expectedEntityURIs);
+ }
+
+ protected void doTestSearch(String label, String turtle, String queryString, Set<String> expectedEntityURIs) {
+ doTestSearch(label, turtle, queryString, expectedEntityURIs, expectedEntityURIs.size());
+ }
+
+ protected void doTestSearch(String label, String turtle, String queryString, Set<String> expectedEntityURIs, int expectedNumResults) {
+ Model model = dataset.getDefaultModel();
+ Reader reader = new StringReader(turtle);
+ dataset.begin(ReadWrite.WRITE);
+ model.read(reader, "", "TURTLE");
+ dataset.commit();
+ doTestQuery(dataset, label, queryString, expectedEntityURIs, expectedNumResults);
+ }
+
+ public static void doTestQuery(Dataset dataset, String label, String queryString, Set<String> expectedEntityURIs, int expectedNumResults) {
+ Query query = QueryFactory.create(queryString) ;
+ QueryExecution qexec = QueryExecutionFactory.create(query, dataset) ;
+ try {
+ dataset.begin(ReadWrite.READ);
+ ResultSet results = qexec.execSelect() ;
+
+ assertEquals(label, expectedNumResults > 0, results.hasNext());
+ int count;
+ for (count=0; results.hasNext(); count++) {
+ String entityURI = results.next().getResource("s").getURI();
+ assertTrue(label + ": unexpected result: " + entityURI, expectedEntityURIs.contains(entityURI));
+ }
+ assertEquals(label, expectedNumResults, count);
+ } finally { qexec.close() ; dataset.end() ; }
+ }
+}
Added: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java?rev=1581801&view=auto
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java (added)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java Wed Mar 26 12:07:22 2014
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import java.io.File;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.hp.hpl.jena.assembler.Assembler;
+import com.hp.hpl.jena.query.Dataset;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * This class defines a setup configuration for a dataset that uses a keyword analyzer with a Lucene index.
+ */
+public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextIndexBase {
+ private static final String INDEX_PATH = "target/test/TestDatasetWithLuceneIndex";
+ private static final File indexDir = new File(INDEX_PATH);
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory <file:" + INDEX_PATH + "> ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; ",
+ " text:predicate rdfs:label ;",
+ " text:analyzer [ a text:KeywordAnalyzer ]",
+ " ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ public static void init() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ deleteOldFiles();
+ indexDir.mkdirs();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+
+ public static void deleteOldFiles() {
+ if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+ }
+
+ @BeforeClass public static void beforeClass() {
+ init();
+ }
+
+ @AfterClass public static void afterClass() {
+ deleteOldFiles();
+ }
+
+ @Test
+ public void testKeywordAnalyzerDoesNotSplitTokensAtSpace() {
+ final String testName = "testKeywordAnalyzerDoesNotSplitTokensAtSpace";
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + testName + ">",
+ " rdfs:label 'EC1V 9BE'",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'EC1V' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<String>() ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+
+ @Test
+ public void testKeywordAnalyzerMatchesWholeField() {
+ final String testName = "testKeywordAnalyzerMatchesWholeField";
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + testName + ">",
+ " rdfs:label 'EC2V 9BE'",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label '\"EC2V 9BE\"' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<String>() ;
+ expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + testName)) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}
Added: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithSimpleAnalyzer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithSimpleAnalyzer.java?rev=1581801&view=auto
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithSimpleAnalyzer.java (added)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithSimpleAnalyzer.java Wed Mar 26 12:07:22 2014
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import java.io.File;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.hp.hpl.jena.assembler.Assembler;
+import com.hp.hpl.jena.query.Dataset;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * This class defines a setup configuration for a dataset that uses a simple analyzer with a Lucene index.
+ */
+public class TestDatasetWithSimpleAnalyzer extends AbstractTestDatasetWithTextIndexBase {
+ private static final String INDEX_PATH = "target/test/TestDatasetWithLuceneIndex";
+ private static final File indexDir = new File(INDEX_PATH);
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory <file:" + INDEX_PATH + "> ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; ",
+ " text:predicate rdfs:label ;",
+ " text:analyzer [ a text:SimpleAnalyzer ]",
+ " ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ public static void init() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ deleteOldFiles();
+ indexDir.mkdirs();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+
+ public static void deleteOldFiles() {
+ if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+ }
+
+ @BeforeClass public static void beforeClass() {
+ init();
+ }
+
+ @AfterClass public static void afterClass() {
+ deleteOldFiles();
+ }
+
+ @Test
+ public void testSimpleAnalyzer() {
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testSimpleAnalyzer>",
+ " rdfs:label 'bar the barfoo foo'",
+ "."
+ );
+ // the simple analyzer should not filter out the 'the' word
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'the' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<String>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testSimpleAnalyzer")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}
Added: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithStandardAnalyzer.java?rev=1581801&view=auto
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithStandardAnalyzer.java (added)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithStandardAnalyzer.java Wed Mar 26 12:07:22 2014
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import java.io.File;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.query.text.assembler.TextAssembler;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.hp.hpl.jena.assembler.Assembler;
+import com.hp.hpl.jena.query.Dataset;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * This class defines a setup configuration for a dataset that uses a standard analyzer with a Lucene index.
+ */
+public class TestDatasetWithStandardAnalyzer extends AbstractTestDatasetWithTextIndexBase {
+ private static final String INDEX_PATH = "target/test/TestDatasetWithLuceneIndex";
+ private static final File indexDir = new File(INDEX_PATH);
+
+ private static final String SPEC_BASE = "http://example.org/spec#";
+ private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
+ private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
+ private static final String SPEC;
+ static {
+ SPEC = StrUtils.strjoinNL(
+ "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
+ "prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
+ "prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
+ "prefix text: <http://jena.apache.org/text#>",
+ "prefix : <" + SPEC_BASE + ">",
+ "",
+ "[] ja:loadClass \"org.apache.jena.query.text.TextQuery\" .",
+ "text:TextDataset rdfs:subClassOf ja:RDFDataset .",
+ "text:TextIndexLucene rdfs:subClassOf text:TextIndex .",
+
+ ":" + SPEC_ROOT_LOCAL,
+ " a text:TextDataset ;",
+ " text:dataset :dataset ;",
+ " text:index :indexLucene ;",
+ " .",
+ "",
+ ":dataset",
+ " a ja:RDFDataset ;",
+ " ja:defaultGraph :graph ;",
+ ".",
+ ":graph",
+ " a ja:MemoryModel ;",
+ ".",
+ "",
+ ":indexLucene",
+ " a text:TextIndexLucene ;",
+ " text:directory <file:" + INDEX_PATH + "> ;",
+ " text:entityMap :entMap ;",
+ " .",
+ "",
+ ":entMap",
+ " a text:EntityMap ;",
+ " text:entityField \"uri\" ;",
+ " text:defaultField \"label\" ;",
+ " text:map (",
+ " [ text:field \"label\" ; ",
+ " text:predicate rdfs:label ;",
+ " text:analyzer [ a text:StandardAnalyzer ; text:stopWords ( 'foo' 'bar' ) ]",
+ " ]",
+ " [ text:field \"comment\" ; text:predicate rdfs:comment ]",
+ " ) ."
+ );
+ }
+
+ public static void init() {
+ Reader reader = new StringReader(SPEC);
+ Model specModel = ModelFactory.createDefaultModel();
+ specModel.read(reader, "", "TURTLE");
+ TextAssembler.init();
+ deleteOldFiles();
+ indexDir.mkdirs();
+ Resource root = specModel.getResource(SPEC_ROOT_URI);
+ dataset = (Dataset) Assembler.general.open(root);
+ }
+
+
+ public static void deleteOldFiles() {
+ if (indexDir.exists()) TextSearchUtil.emptyAndDeleteDirectory(indexDir);
+ }
+
+ @BeforeClass public static void beforeClass() {
+ init();
+ }
+
+ @AfterClass public static void afterClass() {
+ deleteOldFiles();
+ }
+
+ @Test
+ public void testStandardAnalyzerWithSpecifiedStopWords() {
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testOneSimpleResult>",
+ " rdfs:label 'bar the barfoo foo'",
+ "."
+ );
+ // the standard analyzer not to have 'the' as a stop word
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'the' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<String>() ;
+ expectedURIs.addAll( Arrays.asList("http://example.org/data/resource/testOneSimpleResult")) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+
+ @Test
+ public void testStandardAnalyzerIgnoresSpecifiedStopWords() {
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + "testOneSimpleResult>",
+ " rdfs:label 'bar the barfoo foo'",
+ "."
+ );
+ // the standard analyzer not to have 'the' as a stop word
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'foo' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<String>() ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}