You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by os...@apache.org on 2016/04/06 11:05:34 UTC

[1/2] jena git commit: simplify parseQuery and preParseQuery: get rid of primaryField argument as it is always the same

Repository: jena
Updated Branches:
  refs/heads/master 9c4039826 -> 0bc25005a


simplify parseQuery and preParseQuery: get rid of primaryField argument as it is always the same


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/e622fd1f
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/e622fd1f
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/e622fd1f

Branch: refs/heads/master
Commit: e622fd1fec5a164300732aa0de4a3ec2838b7c4b
Parents: 9c40398
Author: Osma Suominen <os...@apache.org>
Authored: Tue Mar 29 17:23:27 2016 +0300
Committer: Osma Suominen <os...@helsinki.fi>
Committed: Wed Apr 6 12:02:04 2016 +0300

----------------------------------------------------------------------
 .../org/apache/jena/query/text/TextIndexLucene.java     | 12 ++++++------
 .../jena/query/text/TextIndexLuceneMultilingual.java    |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/e622fd1f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 65dc881..38a479a 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -289,21 +289,21 @@ public class TextIndexLucene implements TextIndex {
         }
     }
 
-    private static Query parseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
-        QueryParser queryParser = new QueryParser(VER, primaryField, analyzer) ;
+    private Query parseQuery(String queryString, Analyzer analyzer) throws ParseException {
+        QueryParser queryParser = new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
         queryParser.setAllowLeadingWildcard(true) ;
         Query query = queryParser.parse(queryString) ;
         return query ;
     }
     
-    protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
-        return parseQuery(queryString, primaryField, analyzer);
+    protected Query preParseQuery(String queryString, Analyzer analyzer) throws ParseException {
+        return parseQuery(queryString, analyzer);
     }
 
     private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
         String escaped = QueryParserBase.escape(uri) ;
         String qs = docDef.getEntityField() + ":" + escaped ;
-        Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+        Query query = preParseQuery(qs, queryAnalyzer) ;
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
         ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs ;
         List<Map<String, Node>> records = new ArrayList<Map<String, Node>>() ;
@@ -351,7 +351,7 @@ public class TextIndexLucene implements TextIndex {
 
     private List<TextHit> query$(IndexReader indexReader, Node property, String qs, int limit) throws ParseException, IOException {
         IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
-        Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+        Query query = preParseQuery(qs, queryAnalyzer) ;
         if ( limit <= 0 )
             limit = MAX_N ;
         ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;

http://git-wip-us.apache.org/repos/asf/jena/blob/e622fd1f/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index ce20294..ec7a8bb 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -64,12 +64,12 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
     }
 
     @Override
-    protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+    protected Query preParseQuery(String queryString, Analyzer analyzer) throws ParseException {
         if (queryString.contains(getDocDef().getLangField() + ":")) {
             String lang = queryString.substring(queryString.lastIndexOf(":") + 1);
             if (!"*".equals(lang))
                 analyzer = Util.getLocalizedAnalyzer(lang);
         }
-        return super.preParseQuery(queryString, primaryField, analyzer);
+        return super.preParseQuery(queryString, analyzer);
     }
 }


[2/2] jena git commit: JENA-1134: basic support for AnalyzingQueryParser

Posted by os...@apache.org.
JENA-1134: basic support for AnalyzingQueryParser


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0bc25005
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0bc25005
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0bc25005

Branch: refs/heads/master
Commit: 0bc25005a35d72f5053be0aa1602e40e09fb9bf3
Parents: e622fd1
Author: Osma Suominen <os...@apache.org>
Authored: Wed Mar 30 11:09:02 2016 +0300
Committer: Osma Suominen <os...@helsinki.fi>
Committed: Wed Apr 6 12:03:04 2016 +0300

----------------------------------------------------------------------
 .../apache/jena/query/text/TextIndexConfig.java |  9 +++
 .../apache/jena/query/text/TextIndexLucene.java | 17 +++++-
 .../assembler/TextIndexLuceneAssembler.java     | 12 ++++
 .../jena/query/text/assembler/TextVocab.java    |  1 +
 .../org/apache/jena/query/text/TS_Text.java     |  1 +
 .../TestDatasetWithAnalyzingQueryParser.java    | 64 ++++++++++++++++++++
 .../text/TestDatasetWithKeywordAnalyzer.java    | 10 ++-
 7 files changed, 110 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
index 98e82f6..932a645 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexConfig.java
@@ -25,6 +25,7 @@ public class TextIndexConfig {
     EntityDefinition entDef;
     Analyzer analyzer;
     Analyzer queryAnalyzer;
+    String queryParser;
     boolean multilingualSupport;
     boolean valueStored;
 
@@ -51,6 +52,14 @@ public class TextIndexConfig {
     public void setQueryAnalyzer(Analyzer queryAnalyzer) {
         this.queryAnalyzer = queryAnalyzer;
     }
+    
+    public String getQueryParser() {
+        return ((queryParser != null) ? queryParser : "QueryParser");
+    }
+    
+    public void setQueryParser(String queryParser) {
+        this.queryParser = queryParser;
+    }
 
     public boolean isMultilingualSupport() {
         return multilingualSupport;

http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 38a479a..f2e3786 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper ;
 import org.apache.lucene.analysis.standard.StandardAnalyzer ;
 import org.apache.lucene.document.* ;
 import org.apache.lucene.index.* ;
+import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser ;
 import org.apache.lucene.queryparser.classic.ParseException ;
 import org.apache.lucene.queryparser.classic.QueryParser ;
 import org.apache.lucene.queryparser.classic.QueryParserBase ;
@@ -67,6 +68,7 @@ public class TextIndexLucene implements TextIndex {
     private final Directory        directory ;
     private final Analyzer         analyzer ;
     private final Analyzer         queryAnalyzer ;
+    private final String           queryParserType ;
     private final FieldType        ftText ;
 
     // The IndexWriter can't be final because we may have to recreate it if rollback() is called.
@@ -104,6 +106,7 @@ public class TextIndexLucene implements TextIndex {
         this.analyzer = new PerFieldAnalyzerWrapper(
                 (null != config.getAnalyzer()) ? config.getAnalyzer() : new StandardAnalyzer(VER), analyzerPerField) ;
         this.queryAnalyzer = (null != config.getQueryAnalyzer()) ? config.getQueryAnalyzer() : this.analyzer ;
+        this.queryParserType = config.getQueryParser() ;
         this.ftText = config.isValueStored() ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED ;
         if (config.isValueStored() && docDef.getLangField() == null)
             log.warn("Values stored but langField not set. Returned values will not have language tag or datatype.");
@@ -288,9 +291,21 @@ public class TextIndexLucene implements TextIndex {
             throw new TextIndexException(ex) ;
         }
     }
+    
+    private QueryParser getQueryParser(Analyzer analyzer) {
+        switch(queryParserType) {
+            case "QueryParser":
+                return new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+            case "AnalyzingQueryParser":
+                return new AnalyzingQueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+            default:
+                log.warn("Unknown query parser type '" + queryParserType + "'. Defaulting to standard QueryParser") ;
+                return new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+        }
+    }
 
     private Query parseQuery(String queryString, Analyzer analyzer) throws ParseException {
-        QueryParser queryParser = new QueryParser(VER, docDef.getPrimaryField(), analyzer) ;
+        QueryParser queryParser = getQueryParser(analyzer) ;
         queryParser.setAllowLeadingWildcard(true) ;
         Query query = queryParser.parse(queryString) ;
         return query ;

http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
index 98ffec7..1af7e9d 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java
@@ -94,6 +94,17 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
                 Resource analyzerResource = (Resource) qaNode;
                 queryAnalyzer = (Analyzer) a.open(analyzerResource);
             }
+            
+            String queryParser = null;
+            Statement queryParserStatement = root.getProperty(pQueryParser);
+            if (null != queryParserStatement) {
+                RDFNode qpNode = queryParserStatement.getObject();
+                if (! qpNode.isResource()) {
+                    throw new TextIndexException("Text query parser property is not a resource : " + qpNode);
+                }
+                Resource parserResource = (Resource) qpNode;
+                queryParser = parserResource.getLocalName();
+            }
 
             boolean isMultilingualSupport = false;
             Statement mlSupportStatement = root.getProperty(pMultilingualSupport);
@@ -120,6 +131,7 @@ public class TextIndexLuceneAssembler extends AssemblerBase {
             TextIndexConfig config = new TextIndexConfig(docDef);
             config.setAnalyzer(analyzer);
             config.setQueryAnalyzer(queryAnalyzer);
+            config.setQueryParser(queryParser);
             config.setMultilingualSupport(isMultilingualSupport);
             config.setValueStored(storeValues);
 

http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
index 705b565..d60aafb 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
@@ -42,6 +42,7 @@ public class TextVocab
     public static final Property pMultilingualSupport   = Vocab.property(NS, "multilingualSupport") ;
     public static final Property pStoreValues       = Vocab.property(NS, "storeValues") ;
     public static final Property pQueryAnalyzer     = Vocab.property(NS, "queryAnalyzer") ;
+    public static final Property pQueryParser       = Vocab.property(NS, "queryParser") ;
     public static final Property pEntityMap         = Vocab.property(NS, "entityMap") ;
     public static final Property pTokenizer         = Vocab.property(NS, "tokenizer") ;
     public static final Property pFilters           = Vocab.property(NS, "filters") ;

http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
index 6e0be2c..2ee1dd3 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
@@ -51,6 +51,7 @@ import org.junit.runners.Suite.SuiteClasses ;
     , TestLuceneWithMultipleThreads.class
     , TestDatasetWithLocalizedAnalyzer.class
     , TestDatasetWithConfigurableAnalyzer.class
+    , TestDatasetWithAnalyzingQueryParser.class
 })
 
 public class TS_Text

http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java
new file mode 100644
index 0000000..1b77311
--- /dev/null
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithAnalyzingQueryParser.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import java.util.Set ;
+
+import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.ext.com.google.common.collect.Sets ;
+import org.junit.Before ;
+import org.junit.Test ;
+
+/**
+ * This class defines a setup configuration for a dataset that uses an ASCII folding lowercase keyword analyzer with a Lucene index.
+ */
+public class TestDatasetWithAnalyzingQueryParser extends TestDatasetWithConfigurableAnalyzer {
+    @Override
+    @Before
+    public void before() {
+        init(StrUtils.strjoinNL(
+            "text:ConfigurableAnalyzer ;",
+            "text:tokenizer text:KeywordTokenizer ;",
+            "text:filters (text:ASCIIFoldingFilter text:LowerCaseFilter)"
+        ), "text:AnalyzingQueryParser");
+    }    
+    
+    @Test
+    public void testAnalyzingQueryParserAnalyzesWildcards() {
+        final String testName = "testAnalyzingQueryParserAnalyzesWildcards";
+        final String turtle = StrUtils.strjoinNL(
+                TURTLE_PROLOG,
+                "<" + RESOURCE_BASE + testName + ">",
+                "  rdfs:label 'éducation'@fr",
+                ".",
+                "<" + RESOURCE_BASE + "irrelevant>",
+                "  rdfs:label 'déjà vu'@fr",
+                "."
+                );
+        String queryString = StrUtils.strjoinNL(
+                QUERY_PROLOG,
+                "SELECT ?s",
+                "WHERE {",
+                "    ?s text:query ( rdfs:label 'édu*' 10 ) .",
+                "}"
+                );
+        Set<String> expectedURIs = Sets.newHashSet(RESOURCE_BASE + testName);
+        doTestSearch(turtle, queryString, expectedURIs);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/0bc25005/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
index 2ff039d..375c7e0 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
@@ -44,7 +44,7 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
     private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
     private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
 
-    private static String makeSpec(String analyzer) {
+    private static String makeSpec(String analyzer, String parser) {
         return StrUtils.strjoinNL(
                     "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
                     "prefix ja:   <http://jena.hpl.hp.com/2005/11/Assembler#> ",
@@ -73,6 +73,7 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
                     ":indexLucene",
                     "    a text:TextIndexLucene ;",
                     "    text:directory \"mem\" ;",
+                    "    text:queryParser " + parser + ";",
                     "    text:entityMap :entMap ;",
                     "    .",
                     "",
@@ -90,8 +91,8 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
                     );
     }      
     
-    public void init(String analyzer) {
-        Reader reader = new StringReader(makeSpec(analyzer));
+    public void init(String analyzer, String parser) {
+        Reader reader = new StringReader(makeSpec(analyzer, parser));
         Model specModel = ModelFactory.createDefaultModel();
         specModel.read(reader, "", "TURTLE");
         TextAssembler.init();            
@@ -99,6 +100,9 @@ public class TestDatasetWithKeywordAnalyzer extends AbstractTestDatasetWithTextI
         dataset = (Dataset) Assembler.general.open(root);
     }
     
+    public void init(String analyzer) {
+        init(analyzer, "text:QueryParser");
+    }   
     
     @Before
     public void before() {