You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/28 16:57:11 UTC
[02/18] jena git commit: initial documentation
initial documentation
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/8b3757ba
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/8b3757ba
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/8b3757ba
Branch: refs/heads/master
Commit: 8b3757bae52d08d4b308bd0f996ff452c60cc7c9
Parents: 1440e81
Author: Chris Tomlinson <ct...@moonvine.org>
Authored: Wed Apr 19 14:43:04 2017 -0500
Committer: Chris Tomlinson <ct...@moonvine.org>
Committed: Wed Apr 19 14:43:04 2017 -0500
----------------------------------------------------------------------
.../assembler/GenericAnalyzerAssembler.java | 85 ++++++++++++++++++++
1 file changed, 85 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/8b3757ba/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
index 5c25cb2..db707d2 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/assembler/GenericAnalyzerAssembler.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.jena.query.text.assembler;
import org.apache.jena.assembler.Assembler;
@@ -5,7 +23,74 @@ import org.apache.jena.assembler.Mode;
import org.apache.jena.assembler.assemblers.AssemblerBase;
import org.apache.jena.rdf.model.Resource;
+/**
+ * Creates generic analyzers given a fully qualified Class name and a list
+ * of parameters for a constructor of the Class.
+ * <p>
+ * The parameters may be of the following types:
+ * <pre>
+ * string String
+ * set org.apache.lucene.analysis.util.CharArraySet
+ * file java.io.FileReader
+ * int int
+ * boolean boolean
+ * </pre>
+ *
+ * Although the list of types is not exhaustive it is a simple matter
+ * to create a wrapper Analyzer that reads a file with information that can
+ * be used to initialize any sort of parameters that may be needed for
+ * a given Analyzer. The provided types cover the vast majority of cases.
+ * <p>
+ * For example, <code>org.apache.lucene.analysis.ja.JapaneseAnalyzer</code>
+ * has a constructor with 4 parameters: a <code>UserDict</code>,
+ * a <code>CharArraySet</code>, a <code>JapaneseTokenizer.Mode</code>, and a
+ * <code>Set<String></code>. So a simple wrapper can extract the values
+ * needed for the various parameters with types not available in this
+ * extension, construct the required instances, and instantiate the
+ * <code>JapaneseAnalyzer</code>.
+ * <p>
+ * Adding custom Analyzers such as the above wrapper analyzer is a simple
+ * matter of adding the Analyzer class and any associated filters and tokenizer
+ * and so on to the classpath for Jena - usually in a jar. Of course, all of
+ * the Analyzers that are included in the Lucene distribution bundled with Jena
+ * are available as generic Analyzers as well.
+ * <p>
+ * Each parameter object is specified with:
+ * <ul>
+ * <li>an optional <code>text:paramName</code> that may be used to document which
+ * parameter is represented</li>
+ * <li>a <code>text:paramType</code> which is one of: <code>string</code>,
+ * <code>set</code>, <code>file</code>, <code>int</code>, <code>boolean</code>.</li>
+ * <li>a text:paramValue which is an xsd:string, xsd:boolean or xsd:int.</li>
+ * </ul>
+ * <p>
+ * A parameter of type <code>set</code> <i>may have</i> zero or more <code>text:paramValue</code>s.
+ * <p>
+ * A parameter of type <code>string</code>, <code>file</code>, <code>boolean</code>, or
+ * <code>int</code> <i>must have</i> a single <code>text:paramValue</code>
+ */
public class GenericAnalyzerAssembler extends AssemblerBase {
+ /*
+ text:map (
+ [ text:field "text" ;
+ text:predicate rdfs:label;
+ text:analyzer [
+ a text:GenericAnalyzer ;
+ text:class "org.apache.lucene.analysis.en.EnglishAnalyzer" ;
+ text:params [
+ a rdf:seq ;
+ rdf:_1 [
+ text:paramName "stopwords" ;
+ text:paramType "set" ;
+ text:paramValue "the", "a", "an" ] ;
+ rdf:_2 [
+ text:paramName "stemExclusionSet" ;
+ text:paramType "set" ;
+ text:paramValue "ing", "ed" ]
+ ]
+ ]
+ ] .
+ */
public GenericAnalyzerAssembler() {
// TODO Auto-generated constructor stub