You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:16 UTC
[07/18] jena git commit: added more tests
added more tests
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/94b41be7
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/94b41be7
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/94b41be7
Branch: refs/heads/master
Commit: 94b41be7553a4f955c0e41c868d94662bdd7236e
Parents: d2f0561
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 12:29:47 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 12:29:47 2017 -0500
----------------------------------------------------------------------
.../assembler/TestGenericAnalyzerAssembler.java | 121 +++++++++++++++++--
jena-text/testing/some-stop-words.txt | 6 +
2 files changed, 118 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/94b41be7/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index e2a4893..2ddfa31 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -20,21 +20,22 @@ import org.apache.jena.vocabulary.RDFS;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestGenericAnalyzerAssembler {
-
-// // Suppress warnings
-// @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); }
-// @AfterClass public static void afterClass() { LogCtl.setInfo(EntityDefinitionAssembler.class); }
private static final String TESTBASE = "http://example.org/test/";
private static final Resource spec1;
private static final Resource spec2;
private static final Resource spec3;
+ private static final Resource spec4;
+ private static final Resource spec5;
+ private static final Resource spec6;
@Test public void AnalyzerNullaryCtor() {
GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
@@ -54,15 +55,38 @@ public class TestGenericAnalyzerAssembler {
assertEquals(FrenchAnalyzer.class, analyzer.getClass());
}
+ @Test public void AnalyzerCtorAnalyzerInt() {
+ GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+ Analyzer analyzer = gaAssem.open(null, spec4, null);
+ assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+ }
+
+ @Test public void AnalyzerCtorShingle7() {
+ GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+ Analyzer analyzer = gaAssem.open(null, spec5, null);
+ assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+ }
+
+ @Test public void AnalyzerCtorFile() {
+ GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+ Analyzer analyzer = gaAssem.open(null, spec6, null);
+ assertEquals(StopAnalyzer.class, analyzer.getClass());
+ }
+
private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer";
private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer";
+ private static final String CLASS_SHINGLE = "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper";
+ private static final String CLASS_STOP = "org.apache.lucene.analysis.core.StopAnalyzer";
+
+ private static final String FILE_STOPS = "testing/some-stop-words.txt";
- private static final String PARAM_TYPE_BOOL = "boolean";
- private static final String PARAM_TYPE_FILE = "file";
- private static final String PARAM_TYPE_INT = "int";
- private static final String PARAM_TYPE_SET = "set";
- private static final String PARAM_TYPE_STRING = "string";
+ private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
+ private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
+ private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
+ private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
+ private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
+ private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
static {
TextAssembler.init();
@@ -100,6 +124,85 @@ public class TestGenericAnalyzerAssembler {
.addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
}))
;
+
+ // analyzer spec w/ analyzer param and int
+
+ spec4 = model.createResource()
+ .addProperty(RDF.type, TextVocab.genericAnalyzer)
+ .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+ .addProperty(TextVocab.pParams,
+ model.createList(
+ new RDFNode[] {
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "defaultAnalyzer")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+ .addProperty(TextVocab.pParamValue,
+ model.createResource()
+ .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+ ),
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "maxShingleSize")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+ .addLiteral(TextVocab.pParamValue, 3)
+ }))
+ ;
+
+ // analyzer spec w/ seven params of mixed types
+
+ spec5 = model.createResource()
+ .addProperty(RDF.type, TextVocab.genericAnalyzer)
+ .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+ .addProperty(TextVocab.pParams,
+ model.createList(
+ new RDFNode[] {
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "delegate")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+ .addProperty(TextVocab.pParamValue,
+ model.createResource()
+ .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+ ) ,
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "minShingleSize")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+ .addLiteral(TextVocab.pParamValue, 2) ,
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "maxShingleSize")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+ .addLiteral(TextVocab.pParamValue, 4) ,
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "tokenSeparator")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+ .addLiteral(TextVocab.pParamValue, "|") ,
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "outputUnigrams")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+ .addLiteral(TextVocab.pParamValue, false) ,
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+ .addLiteral(TextVocab.pParamValue, true) ,
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "fillerToken")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+ .addLiteral(TextVocab.pParamValue, "foo")
+ }))
+ ;
+
+ // analyzer spec w/ one file param
+
+ spec6 = model.createResource()
+ .addProperty(RDF.type, TextVocab.genericAnalyzer)
+ .addProperty(TextVocab.pClass, CLASS_STOP)
+ .addProperty(TextVocab.pParams,
+ model.createList(
+ new RDFNode[] {
+ model.createResource()
+ .addProperty(TextVocab.pParamName, "stopWords")
+ .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+ .addProperty(TextVocab.pParamValue, FILE_STOPS)
+ }))
+ ;
}
private static Resource strs2list(Model model, String string) {
http://git-wip-us.apache.org/repos/asf/jena/blob/94b41be7/jena-text/testing/some-stop-words.txt
----------------------------------------------------------------------
diff --git a/jena-text/testing/some-stop-words.txt b/jena-text/testing/some-stop-words.txt
new file mode 100644
index 0000000..e648d66
--- /dev/null
+++ b/jena-text/testing/some-stop-words.txt
@@ -0,0 +1,6 @@
+foo
+bar
+baz
+flip
+flop
+mop
\ No newline at end of file