You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:16 UTC

[07/18] jena git commit: added more tests

added more tests

Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/94b41be7
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/94b41be7
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/94b41be7

Branch: refs/heads/master
Commit: 94b41be7553a4f955c0e41c868d94662bdd7236e
Parents: d2f0561
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Sat Apr 22 12:29:47 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Sat Apr 22 12:29:47 2017 -0500

----------------------------------------------------------------------
 .../assembler/TestGenericAnalyzerAssembler.java | 121 +++++++++++++++++--
 jena-text/testing/some-stop-words.txt           |   6 +
 2 files changed, 118 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/94b41be7/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
index e2a4893..2ddfa31 100644
--- a/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
+++ b/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestGenericAnalyzerAssembler.java
@@ -20,21 +20,22 @@ import org.apache.jena.vocabulary.RDFS;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.core.SimpleAnalyzer;
+import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestGenericAnalyzerAssembler {
-    
-//    // Suppress warnings
-//    @BeforeClass public static void beforeClass() { LogCtl.setError(EntityDefinitionAssembler.class); }
-//    @AfterClass  public static void afterClass()  { LogCtl.setInfo(EntityDefinitionAssembler.class); }
 
     private static final String TESTBASE = "http://example.org/test/";
     private static final Resource spec1;
     private static final Resource spec2;
     private static final Resource spec3;
+    private static final Resource spec4;
+    private static final Resource spec5;
+    private static final Resource spec6;
     
     @Test public void AnalyzerNullaryCtor() {
         GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
@@ -54,15 +55,38 @@ public class TestGenericAnalyzerAssembler {
         assertEquals(FrenchAnalyzer.class, analyzer.getClass());
     }
     
+    @Test public void AnalyzerCtorAnalyzerInt() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec4, null);
+        assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorShingle7() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec5, null);
+        assertEquals(ShingleAnalyzerWrapper.class, analyzer.getClass());
+    }
+    
+    @Test public void AnalyzerCtorFile() {
+        GenericAnalyzerAssembler gaAssem = new GenericAnalyzerAssembler();
+        Analyzer analyzer = gaAssem.open(null, spec6, null);
+        assertEquals(StopAnalyzer.class, analyzer.getClass());
+    }
+    
     
     private static final String CLASS_SIMPLE = "org.apache.lucene.analysis.core.SimpleAnalyzer";
     private static final String CLASS_FRENCH = "org.apache.lucene.analysis.fr.FrenchAnalyzer";
+    private static final String CLASS_SHINGLE = "org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper";
+    private static final String CLASS_STOP = "org.apache.lucene.analysis.core.StopAnalyzer";
+    
+    private static final String FILE_STOPS = "testing/some-stop-words.txt";
     
-    private static final String PARAM_TYPE_BOOL = "boolean";
-    private static final String PARAM_TYPE_FILE = "file";
-    private static final String PARAM_TYPE_INT = "int";
-    private static final String PARAM_TYPE_SET = "set";
-    private static final String PARAM_TYPE_STRING = "string";
+    private static final String PARAM_TYPE_ANALYZER = GenericAnalyzerAssembler.TYPE_ANALYZER;
+    private static final String PARAM_TYPE_BOOL = GenericAnalyzerAssembler.TYPE_BOOL;
+    private static final String PARAM_TYPE_FILE = GenericAnalyzerAssembler.TYPE_FILE;
+    private static final String PARAM_TYPE_INT = GenericAnalyzerAssembler.TYPE_INT;
+    private static final String PARAM_TYPE_SET = GenericAnalyzerAssembler.TYPE_SET;
+    private static final String PARAM_TYPE_STRING = GenericAnalyzerAssembler.TYPE_STRING;
     
     static {
         TextAssembler.init();
@@ -100,6 +124,85 @@ public class TestGenericAnalyzerAssembler {
                                                   .addProperty(TextVocab.pParamValue, strs2list(model, "les le du"))
                                           }))
                      ;
+        
+        // analyzer spec w/ analyzer param and int
+                
+        spec4 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "defaultAnalyzer")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamValue, 
+                                                               model.createResource()
+                                                               .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+                                                               ),
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "maxShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 3)
+                                          }))
+                     ;
+        
+        // analyzer spec w/ seven params of mixed types
+                
+        spec5 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_SHINGLE)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "delegate")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_ANALYZER)
+                                                  .addProperty(TextVocab.pParamValue, 
+                                                               model.createResource()
+                                                               .addProperty(RDF.type, TextVocab.simpleAnalyzer)
+                                                               ) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "minShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 2) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "maxShingleSize")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_INT)
+                                                  .addLiteral(TextVocab.pParamValue, 4) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "tokenSeparator")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addLiteral(TextVocab.pParamValue, "|") ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "outputUnigrams")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addLiteral(TextVocab.pParamValue, false) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "outputUnigramsIfNoShingles")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_BOOL)
+                                                  .addLiteral(TextVocab.pParamValue, true) ,
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "fillerToken")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_STRING)
+                                                  .addLiteral(TextVocab.pParamValue, "foo")
+                                          }))
+                     ;
+        
+        // analyzer spec w/ one file param
+                
+        spec6 = model.createResource()
+                     .addProperty(RDF.type, TextVocab.genericAnalyzer)
+                     .addProperty(TextVocab.pClass, CLASS_STOP)
+                     .addProperty(TextVocab.pParams,
+                                  model.createList(
+                                          new RDFNode[] { 
+                                                  model.createResource()
+                                                  .addProperty(TextVocab.pParamName, "stopWords")
+                                                  .addProperty(TextVocab.pParamType, PARAM_TYPE_FILE)
+                                                  .addProperty(TextVocab.pParamValue, FILE_STOPS)
+                                          }))
+                     ;
     }
     
     private static Resource strs2list(Model model, String string) {

http://git-wip-us.apache.org/repos/asf/jena/blob/94b41be7/jena-text/testing/some-stop-words.txt
----------------------------------------------------------------------
diff --git a/jena-text/testing/some-stop-words.txt b/jena-text/testing/some-stop-words.txt
new file mode 100644
index 0000000..e648d66
--- /dev/null
+++ b/jena-text/testing/some-stop-words.txt
@@ -0,0 +1,6 @@
+foo
+bar
+baz
+flip
+flop
+mop
\ No newline at end of file