You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2014/09/04 10:38:02 UTC
svn commit: r1622417 - in /jena/trunk/jena-text/src:
main/java/org/apache/jena/query/text/analyzer/
main/java/org/apache/jena/query/text/assembler/
test/java/org/apache/jena/query/text/
test/java/org/apache/jena/query/text/assembler/
Author: andy
Date: Thu Sep 4 08:38:01 2014
New Revision: 1622417
URL: http://svn.apache.org/r1622417
Log:
JENA-776 : LowerCaseKeywordAnalyzer
Added:
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/analyzer/
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java (with props)
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/LowerCaseKeywordAnalyzerAssembler.java (with props)
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLowerCaseKeywordAnalyzer.java (with props)
Modified:
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
Added: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java?rev=1622417&view=auto
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java (added)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java Thu Sep 4 08:38:01 2014
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.analyzer ;
+
+import java.io.Reader ;
+
+import org.apache.lucene.analysis.Analyzer ;
+import org.apache.lucene.analysis.core.KeywordTokenizer ;
+import org.apache.lucene.analysis.core.LowerCaseFilter ;
+import org.apache.lucene.util.Version ;
+
+
+/**
+ * Lucene Analyzer implementation that works like KeywordAnalyzer (i.e.
+ * doesn't tokenize the input, keeps it as a single token), but forces text
+ * to lowercase and is thus case-insensitive.
+ */
+
+public class LowerCaseKeywordAnalyzer extends Analyzer {
+ private Version version;
+
+ public LowerCaseKeywordAnalyzer(Version ver) {
+ this.version = ver;
+ }
+
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ KeywordTokenizer source = new KeywordTokenizer(reader);
+ LowerCaseFilter filter = new LowerCaseFilter(version, source);
+ return new TokenStreamComponents(source, filter);
+ }
+
+}
Propchange: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/analyzer/LowerCaseKeywordAnalyzer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/LowerCaseKeywordAnalyzerAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/LowerCaseKeywordAnalyzerAssembler.java?rev=1622417&view=auto
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/LowerCaseKeywordAnalyzerAssembler.java (added)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/LowerCaseKeywordAnalyzerAssembler.java Thu Sep 4 08:38:01 2014
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text.assembler ;
+
+import org.apache.jena.query.text.TextIndexLucene;
+import org.apache.jena.query.text.analyzer.LowerCaseKeywordAnalyzer;
+import org.apache.lucene.analysis.Analyzer;
+
+import com.hp.hpl.jena.assembler.Assembler;
+import com.hp.hpl.jena.assembler.Mode;
+import com.hp.hpl.jena.assembler.assemblers.AssemblerBase;
+import com.hp.hpl.jena.rdf.model.Resource;
+
+/**
+ * Assembler to create lowercase keyword analyzers.
+ */
+public class LowerCaseKeywordAnalyzerAssembler extends AssemblerBase {
+ /*
+ text:map (
+ [ text:field "text" ;
+ text:predicate rdfs:label;
+ text:analyzer [
+ a lucene:LowerCaseKeywordAnalyzer ; ]
+ ]
+ .
+ */
+
+ @Override
+ public Analyzer open(Assembler a, Resource root, Mode mode) {
+ return new LowerCaseKeywordAnalyzer(TextIndexLucene.VER);
+ }
+}
Propchange: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/LowerCaseKeywordAnalyzerAssembler.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java?rev=1622417&r1=1622416&r2=1622417&view=diff
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java (original)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java Thu Sep 4 08:38:01 2014
@@ -33,6 +33,7 @@ public class TextAssembler
Assembler.general.implementWith(TextVocab.standardAnalyzer, new StandardAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.simpleAnalyzer, new SimpleAnalyzerAssembler()) ;
Assembler.general.implementWith(TextVocab.keywordAnalyzer, new KeywordAnalyzerAssembler()) ;
+ Assembler.general.implementWith(TextVocab.lowerCaseKeywordAnalyzer, new LowerCaseKeywordAnalyzerAssembler()) ;
}
}
Modified: jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java?rev=1622417&r1=1622416&r2=1622417&view=diff
==============================================================================
--- jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java (original)
+++ jena/trunk/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java Thu Sep 4 08:38:01 2014
@@ -54,6 +54,7 @@ public class TextVocab
public static final Property pStopWords = Vocab.property(NS, "stopWords");
public static final Resource simpleAnalyzer = Vocab.resource(NS, "SimpleAnalyzer");
public static final Resource keywordAnalyzer = Vocab.resource(NS, "KeywordAnalyzer");
+ public static final Resource lowerCaseKeywordAnalyzer = Vocab.resource(NS, "LowerCaseKeywordAnalyzer");
}
Modified: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java?rev=1622417&r1=1622416&r2=1622417&view=diff
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java (original)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java Thu Sep 4 08:38:01 2014
@@ -44,6 +44,7 @@ import org.junit.runners.Suite.SuiteClas
, TestDatasetWithSimpleAnalyzer.class
, TestDatasetWithStandardAnalyzer.class
, TestDatasetWithKeywordAnalyzer.class
+ , TestDatasetWithLowerCaseKeywordAnalyzer.class
})
public class TS_Text
Modified: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java?rev=1622417&r1=1622416&r2=1622417&view=diff
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java (original)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithKeywordAnalyzer.java Thu Sep 4 08:38:01 2014
@@ -47,9 +47,9 @@ public class TestDatasetWithKeywordAnaly
private static final String SPEC_BASE = "http://example.org/spec#";
private static final String SPEC_ROOT_LOCAL = "lucene_text_dataset";
private static final String SPEC_ROOT_URI = SPEC_BASE + SPEC_ROOT_LOCAL;
- private static final String SPEC;
- static {
- SPEC = StrUtils.strjoinNL(
+
+ private static String makeSpec(String analyzer) {
+ return StrUtils.strjoinNL(
"prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ",
"prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> ",
"prefix tdb: <http://jena.hpl.hp.com/2008/tdb#>",
@@ -87,15 +87,15 @@ public class TestDatasetWithKeywordAnaly
" text:map (",
" [ text:field \"label\" ; ",
" text:predicate rdfs:label ;",
- " text:analyzer [ a text:KeywordAnalyzer ]",
+ " text:analyzer [ a " + analyzer + " ]",
" ]",
" [ text:field \"comment\" ; text:predicate rdfs:comment ]",
" ) ."
);
}
- public static void init() {
- Reader reader = new StringReader(SPEC);
+ public static void init(String analyzer) {
+ Reader reader = new StringReader(makeSpec(analyzer));
Model specModel = ModelFactory.createDefaultModel();
specModel.read(reader, "", "TURTLE");
TextAssembler.init();
@@ -111,7 +111,7 @@ public class TestDatasetWithKeywordAnaly
}
@BeforeClass public static void beforeClass() {
- init();
+ init("text:KeywordAnalyzer");
}
@AfterClass public static void afterClass() {
Added: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLowerCaseKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLowerCaseKeywordAnalyzer.java?rev=1622417&view=auto
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLowerCaseKeywordAnalyzer.java (added)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLowerCaseKeywordAnalyzer.java Thu Sep 4 08:38:01 2014
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.query.text;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.jena.atlas.lib.StrUtils;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * This class defines a setup configuration for a dataset that uses a lowercase keyword analyzer with a Lucene index.
+ */
+public class TestDatasetWithLowerCaseKeywordAnalyzer extends TestDatasetWithKeywordAnalyzer {
+ @BeforeClass public static void beforeClass() {
+ init("text:LowerCaseKeywordAnalyzer");
+ }
+
+ @Test
+ public void testLowerCaseKeywordAnalyzerIsCaseInsensitive() {
+ final String testName = "testLowerCaseKeywordAnalyzerIsCaseInsensitive";
+ final String turtle = StrUtils.strjoinNL(
+ TURTLE_PROLOG,
+ "<" + RESOURCE_BASE + testName + ">",
+ " rdfs:label 'F;riM at&/ped9'",
+ "."
+ );
+ String queryString = StrUtils.strjoinNL(
+ QUERY_PROLOG,
+ "SELECT ?s",
+ "WHERE {",
+ " ?s text:query ( rdfs:label 'f;ri*' 10 ) .",
+ "}"
+ );
+ Set<String> expectedURIs = new HashSet<>() ;
+ expectedURIs.addAll( Arrays.asList(RESOURCE_BASE + testName)) ;
+ doTestSearch(turtle, queryString, expectedURIs);
+ }
+}
Propchange: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLowerCaseKeywordAnalyzer.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java?rev=1622417&r1=1622416&r2=1622417&view=diff
==============================================================================
--- jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java (original)
+++ jena/trunk/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java Thu Sep 4 08:38:01 2014
@@ -27,6 +27,7 @@ import org.apache.jena.atlas.lib.Interna
import org.apache.jena.atlas.logging.LogCtl ;
import org.apache.jena.query.text.EntityDefinition ;
import org.apache.jena.query.text.TextIndexException ;
+import org.apache.jena.query.text.analyzer.LowerCaseKeywordAnalyzer ;
import org.apache.lucene.analysis.core.KeywordAnalyzer ;
import org.apache.lucene.analysis.core.SimpleAnalyzer ;
import org.apache.lucene.analysis.standard.StandardAnalyzer ;
@@ -55,6 +56,7 @@ public class TestEntityMapAssembler {
private static final Resource spec3;
private static final Resource spec4;
private static final Resource spec5;
+ private static final Resource spec6;
private static final Resource specNoEntityField;
private static final Resource specNoDefaultField;
private static final Resource specNoMapProperty;
@@ -112,6 +114,12 @@ public class TestEntityMapAssembler {
assertEquals(KeywordAnalyzer.class, entityDef.getAnalyzer(SPEC1_DEFAULT_FIELD).getClass());
}
+ @Test public void EntityHasMapEntryWithLowerCaseKeywordAnalyzer() {
+ EntityDefinitionAssembler entDefAssem = new EntityDefinitionAssembler();
+ EntityDefinition entityDef = entDefAssem.open(Assembler.general, spec6, null);
+ assertEquals(LowerCaseKeywordAnalyzer.class, entityDef.getAnalyzer(SPEC1_DEFAULT_FIELD).getClass());
+ }
+
@Test(expected=TextIndexException.class) public void errorOnNoEntityField() {
EntityDefinitionAssembler entDefAssem = new EntityDefinitionAssembler();
entDefAssem.open(null, specNoEntityField, null);
@@ -231,6 +239,22 @@ public class TestEntityMapAssembler {
.addProperty(RDF.type, TextVocab.keywordAnalyzer))
}));
+ // create a simple entity map specification using a lowercase keyword analyzer
+
+ spec6 = model.createResource(TESTBASE + "spec6")
+ .addProperty(TextVocab.pEntityField, SPEC1_ENTITY_FIELD)
+ .addProperty(TextVocab.pDefaultField, SPEC1_DEFAULT_FIELD)
+ .addProperty(TextVocab.pMap,
+ model.createList(
+ new RDFNode[] {
+ model.createResource()
+ .addProperty(TextVocab.pField, SPEC1_DEFAULT_FIELD)
+ .addProperty(TextVocab.pPredicate, SPEC1_PREDICATE)
+ .addProperty(TextVocab.pAnalyzer,
+ model.createResource()
+ .addProperty(RDF.type, TextVocab.lowerCaseKeywordAnalyzer))
+ }));
+
// bad assembler spec
specNoEntityField =