You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by os...@apache.org on 2016/04/06 11:05:34 UTC
[1/2] jena git commit: simplify parseQuery and preParseQuery: get rid
of primaryField argument as it is always the same
Repository: jena
Updated Branches:
refs/heads/master 9c4039826 -> 0bc25005a
simplify parseQuery and preParseQuery: get rid of primaryField argument as it is always the same
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/e622fd1f
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/e622fd1f
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/e622fd1f
Branch: refs/heads/master
Commit: e622fd1fec5a164300732aa0de4a3ec2838b7c4b
Parents: 9c40398
Author: Osma Suominen <os...@apache.org>
Authored: Tue Mar 29 17:23:27 2016 +0300
Committer: Osma Suominen <os...@helsinki.fi>
Committed: Wed Apr 6 12:02:04 2016 +0300
----------------------------------------------------------------------
.../org/apache/jena/query/text/TextIndexLucene.java | 12 ++++++------
.../jena/query/text/TextIndexLuceneMultilingual.java | 4 ++--
2 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/e622fd1f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 65dc881..38a479a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -289,21 +289,21 @@ public class TextIndexLucene implements TextIndex {
}
}
- private static Query parseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
- QueryParser queryParser = new QueryParser(VER, primaryField, analyzer) ;
+ private Query parseQuery(String queryString, Analyzer analyzer) throws ParseException {
+ QueryParser queryParser = new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
queryParser.setAllowLeadingWildcard(true) ;
Query query = queryParser.parse(queryString) ;
return query ;
}
- protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
- return parseQuery(queryString, primaryField, analyzer);
+ protected Query preParseQuery(String queryString, Analyzer analyzer) throws ParseException {
+ return parseQuery(queryString, analyzer);
}
private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
String escaped = QueryParserBase.escape(uri) ;
String qs = docDef.getEntityField() + ":" + escaped ;
- Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+ Query query = preParseQuery(qs, queryAnalyzer) ;
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs ;
List<Map<String, Node>> records = new ArrayList<Map<String, Node>>() ;
@@ -351,7 +351,7 @@ public class TextIndexLucene implements TextIndex {
private List<TextHit> query$(IndexReader indexReader, Node property, String qs, int limit) throws ParseException, IOException {
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
- Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+ Query query = preParseQuery(qs, queryAnalyzer) ;
if ( limit <= 0 )
limit = MAX_N ;
ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
http://git-wip-us.apache.org/repos/asf/jena/blob/e622fd1f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index ce20294..ec7a8bb 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -64,12 +64,12 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
}
@Override
- protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+ protected Query preParseQuery(String queryString, Analyzer analyzer) throws ParseException {
if (queryString.contains(getDocDef().getLangField() + ":")) {
String lang = queryString.substring(queryString.lastIndexOf(":") + 1);
if (!"*".equals(lang))
analyzer = Util.getLocalizedAnalyzer(lang);
}
- return super.preParseQuery(queryString, primaryField, analyzer);
+ return super.preParseQuery(queryString, analyzer);
}
}
[2/2] jena git commit: JENA-1134: basic support for
AnalyzingQueryParser
Posted by os...@apache.org.
JENA-1134: basic support for AnalyzingQueryParser
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0bc25005
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0bc25005
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0bc25005
Branch: refs/heads/master
Commit: 0bc25005a35d72f5053be0aa1602e40e09fb9bf3
Parents: e622fd1
Author: Osma Suominen <os...@apache.org>
Authored: Wed Mar 30 11:09:02 2016 +0300
Committer: Osma Suominen <os...@helsinki.fi>
Committed: Wed Apr 6 12:03:04 2016 +0300
----------------------------------------------------------------------
.../apache/jena/query/text/TextIndexConfig.java | 9 +++
.../apache/jena/query/text/TextIndexLucene.java | 17 +++++-
.../assembler/TextIndexLuceneAssembler.java | 12 ++++
.../jena/query/text/assembler/TextVocab.java | 1 +
.../org/apache/jena/query/text/TS_Text.java | 1 +
.../TestDatasetWithAnalyzingQueryParser.java | 64 ++++++++++++++++++++
.../text/TestDatasetWithKeywordAnalyzer.java | 10 ++-
7 files changed, 110 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
index 98e82f6..932a645 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
@@ -25,6 +25,7 @@ public class TextIndexConfig {
EntityDefinition entDef;
Analyzer analyzer;
Analyzer queryAnalyzer;
+ String queryParser;
boolean multilingualSupport;
boolean valueStored;
@@ -51,6 +52,14 @@ public class TextIndexConfig {
public void setQueryAnalyzer(Analyzer queryAnalyzer) {
this.queryAnalyzer = queryAnalyzer;
}
+
+ public String getQueryParser() {
+ return ((queryParser != null) ? queryParser : "QueryParser");
+ }
+
+ public void setQueryParser(String queryParser) {
+ this.queryParser = queryParser;
+ }
public boolean isMultilingualSupport() {
return multilingualSupport;
http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 38a479a..f2e3786 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper ;
import org.apache.lucene.analysis.standard.StandardAnalyzer ;
import org.apache.lucene.document.* ;
import org.apache.lucene.index.* ;
+import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser ;
import org.apache.lucene.queryparser.classic.ParseException ;
import org.apache.lucene.queryparser.classic.QueryParser ;
import org.apache.lucene.queryparser.classic.QueryParserBase ;
@@ -67,6 +68,7 @@ public class TextIndexLucene implements TextIndex {
private final Directory directory ;
private final Analyzer analyzer ;
private final Analyzer queryAnalyzer ;
+ private final String queryParserType ;
private final FieldType ftText ;
// The IndexWriter can't be final because we may have to recreate it if rollback() is called.
@@ -104,6 +106,7 @@ public class TextIndexLucene implements TextIndex {
this.analyzer = new PerFieldAnalyzerWrapper(
(null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
+ this.queryParserType = config.getQueryParser() ;
this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ;
if (config.isValueStored() && docDef.getLangField() == null)
log.warn("Values stored but langField not set. Returned values will not have language tag or datatype.");
@@ -288,9 +291,21 @@ public class TextIndexLucene implements TextIndex {
throw new TextIndexException(ex) ;
}
}
+
+ private QueryParser getQueryParser(Analyzer analyzer) {
+ switch(queryParserType) {
+ case "QueryParser":
+ return new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+ case "AnalyzingQueryParser":
+ return new AnalyzingQueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+ default:
+ log.warn("Unknown query parser type '" + queryParserType + "'. Defaulting to standard QueryParser") ;
+ return new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+ }
+ }
private Query parseQuery(String queryString, Analyzer analyzer) throws ParseException {
- QueryParser queryParser = new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+ QueryParser queryParser = getQueryParser(analyzer) ;
queryParser.setAllowLeadingWildcard(true) ;
Query query = queryParser.parse(queryString) ;
return query ;
http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 98ffec7..1af7e9d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -94,6 +94,17 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
Resource analyzerResource = (Resource) qaNode;
queryAnalyzer = (Analyzer) a.open(analyzerResource);
}
+
+ String queryParser = null;
+ Statement queryParserStatement = root.getProperty(pQueryParser);
+ if (null != queryParserStatement) {
+ RDFNode qpNode = queryParserStatement.getObject();
+ if (! qpNode.isResource()) {
+ throw new TextIndexException("Text query parser property is not a resource : " + qpNode);
+ }
+ Resource parserResource = (Resource) qpNode;
+ queryParser = parserResource.getLocalName();
+ }
boolean isMultilingualSupport = false;
Statement mlSupportStatement = root.getProperty(pMultilingualSupport);
@@ -120,6 +131,7 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
TextIndexConfig config = new TextIndexConfig(docDef);
config.setAnalyzer(analyzer);
config.setQueryAnalyzer(queryAnalyzer);
+ config.setQueryParser(queryParser);
config.setMultilingualSupport(isMultilingualSupport);
config.setValueStored(storeValues);
http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 705b565..d60aafb 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -42,6 +42,7 @@ public class TextVocab
public static final Property pMultilingualSupport = Vocab.property(NS, "multilingualSupport") ;
public static final Property pStoreValues = Vocab.property(NS, "storeValues") ;
public static final Property pQueryAnalyzer = Vocab.property(NS, "queryAnalyzer") ;
+ public static final Property pQueryParser = Vocab.property(NS, "queryParser") ;
public static final Property pEntityMap = Vocab.property(NS, "entityMap") ;
public static final Property pTokenizer = Vocab.property(NS, "tokenizer") ;
public static final Property pFilters = Vocab.property(NS, "filters") ;
http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 6e0be2c..2ee1dd3 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -51,6 +51,7 @@ import org.junit.runners.Suite.SuiteClasses ;
, TestLuceneWithMultipleThreads.class
, TestDatasetWithLocalizedAnalyzer.class
, TestDatasetWithConfigurableAnalyzer.class
+ , TestDatasetWithAnalyzingQueryParser.class
})
public class TS_Text
http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java
new file mode 100644
index 0000000..1b77311
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import java.util.Set ;
+
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.ext.com.google.common.collect.Sets ;
+import org.junit.Before ;
+import org.junit.Test ;
+
+/**
+ * This class defines a setup configuration for a dataset that uses an ASCII folding lowercase keyword analyzer with a Lucene index.
+ */
+public class TestDatasetWithAnalyzingQueryParser extends TestDatasetWithConfigurableAnalyzer {
+ @Override
+ @Before
+ public void before() {
+ init(StrUtils.strjoinNL(
+ "text:ConfigurableAnalyzer ;",
+ "text:tokenizer text:KeywordTokenizer ;",
+ "text:filters (text:ASCIIFoldingFilter text:LowerCaseFilter)"
+ ), "text:AnalyzingQueryParser");
+ }
+
+ @Test
+ public void testAnalyzingQueryParserAnalyzesWildcards() {
+ final String testName = "testAnalyzingQueryParserAnalyzesWildcards";
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + testName + ">",
+ " rdfs:label 'éducation'@fr",
+ ".",
+ "<" + RESOURCE_BASE + "irrelevant>",
+ " rdfs:label 'déjà vu'@fr",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'édu*' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = Sets.newHashSet(RESOURCE_BASE + testName);
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}
http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
index 2ff039d..375c7e0 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
@@ -44,7 +44,7 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
- private static String makeSpec(String analyzer) {
+ private static String makeSpec(String analyzer, String parser) {
return StrUtils.strjoinNL(
"prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
"prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
@@ -73,6 +73,7 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
":indexLucene",
" a text:TextIndexLucene ;",
" text:directory \"mem\" ;",
+ " text:queryParser " + parser + ";",
" text:entityMap :entMap ;",
" .",
"",
@@ -90,8 +91,8 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
);
}
- public void init(String analyzer) {
- Reader reader = new StringReader(makeSpec(analyzer));
+ public void init(String analyzer, String parser) {
+ Reader reader = new StringReader(makeSpec(analyzer, parser));
Model specModel = ModelFactory.createDefaultModel();
specModel.read(reader, "", "TURTLE");
TextAssembler.init();
@@ -99,6 +100,9 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
dataset = (Dataset) Assembler.general.open(root);
}
+ public void init(String analyzer) {
+ init(analyzer, "text:QueryParser");
+ }
@Before
public void before() {