You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2015/05/25 14:10:53 UTC
[2/6] jena git commit: Remove of multilingual aspects in
TextIndexLucene + only usage of ISO 639-1 language codes
Remove of multilingual aspects in TextIndexLucene
+ only usage of ISO 639-1 language codes
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/7ab59ed6
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/7ab59ed6
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/7ab59ed6
Branch: refs/heads/master
Commit: 7ab59ed6d914496c9a1492376745fe9cee840f67
Parents: 9553c6b
Author: Alexis Miara <al...@hotmail.com>
Authored: Thu May 14 09:17:40 2015 -0400
Committer: Alexis Miara <al...@hotmail.com>
Committed: Thu May 14 09:17:40 2015 -0400
----------------------------------------------------------------------
.../org/apache/jena/query/text/LuceneUtil.java | 55 ---------------
.../org/apache/jena/query/text/TextIndex.java | 3 -
.../apache/jena/query/text/TextIndexLucene.java | 74 ++++++++------------
.../query/text/TextIndexLuceneMultilingual.java | 47 ++++++++++++-
.../apache/jena/query/text/TextIndexSolr.java | 5 --
.../org/apache/jena/query/text/TextQueryPF.java | 8 +--
6 files changed, 78 insertions(+), 114 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
index 7fafc4c..050b6f3 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/LuceneUtil.java
@@ -37,7 +37,6 @@ public class LuceneUtil {
}
public static Analyzer getLocalizedAnalyzer(String lang, Version ver) {
- lang = getISO2Language(lang);
if (lang == null)
return null;
@@ -58,60 +57,6 @@ public class LuceneUtil {
}
}
- public static String getISO2Language(String lang) {
- if (lang != null) {
- lang = lang.split("-")[0].toLowerCase();
- if (lang.length() == 2)
- return lang;
- else {
- if ("ara".equals(lang)) return "ar";
- if ("bul".equals(lang)) return "bg";
- if ("cat".equals(lang)) return "ca";
- if ("ces".equals(lang)) return "cs";
- if ("cze".equals(lang)) return "cs";
- if ("dan".equals(lang)) return "da";
- if ("deu".equals(lang)) return "de";
- if ("ger".equals(lang)) return "de";
- if ("ell".equals(lang)) return "el";
- if ("gre".equals(lang)) return "el";
- if ("eng".equals(lang)) return "en";
- if ("spa".equals(lang)) return "es";
- if ("eus".equals(lang)) return "eu";
- if ("baq".equals(lang)) return "eu";
- if ("fas".equals(lang)) return "fa";
- if ("per".equals(lang)) return "fa";
- if ("fin".equals(lang)) return "fi";
- if ("fra".equals(lang)) return "fr";
- if ("fre".equals(lang)) return "fr";
- if ("gle".equals(lang)) return "ga";
- if ("glg".equals(lang)) return "gl";
- if ("hin".equals(lang)) return "hi";
- if ("hun".equals(lang)) return "hu";
- if ("hye".equals(lang)) return "hy";
- if ("arm".equals(lang)) return "hy";
- if ("ind".equals(lang)) return "id";
- if ("ita".equals(lang)) return "it";
- if ("jpn".equals(lang)) return "jp";
- if ("kor".equals(lang)) return "ko";
- if ("lav".equals(lang)) return "lv";
- if ("nld".equals(lang)) return "nl";
- if ("dut".equals(lang)) return "nl";
- if ("nor".equals(lang)) return "no";
- if ("por".equals(lang)) return "pt";
- if ("ron".equals(lang)) return "ro";
- if ("rum".equals(lang)) return "ro";
- if ("rus".equals(lang)) return "ru";
- if ("swe".equals(lang)) return "sv";
- if ("tha".equals(lang)) return "th";
- if ("tur".equals(lang)) return "tr";
- if ("zho".equals(lang)) return "zh";
- if ("chi".equals(lang)) return "zh";
- }
- }
-
- return null;
- }
-
private static void initAnalyzerDefs() {
analyzersClasses = new Hashtable<>();
analyzersClasses.put("ar", org.apache.lucene.analysis.ar.ArabicAnalyzer.class);
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
index ffe92e7..69efb31 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java
@@ -51,7 +51,4 @@ public interface TextIndex extends Closeable //, Transactional
List<Node> query(String qs) ;
EntityDefinition getDocDef() ;
-
- //localization
- boolean isMultilingual() ;
}
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
index 004c242..abb9466 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java
@@ -206,42 +206,36 @@ public class TextIndexLucene implements TextIndex {
if ( log.isDebugEnabled() )
log.debug("Update entity: " + entity) ;
try {
- Document doc = doc(entity);
- Analyzer analyzer = null;
- if (isMultilingual())
- analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
- Term term = new Term(docDef.getEntityField(), entity.getId());
-
- if (analyzer != null)
- indexWriter.updateDocument(term, doc, analyzer) ;
- else //use the default one
- indexWriter.updateDocument(term, doc);
+ updateDocument(entity);
} catch (IOException e) {
throw new TextIndexException(e) ;
}
}
+ protected void updateDocument(Entity entity) throws IOException {
+ Document doc = doc(entity);
+ Term term = new Term(docDef.getEntityField(), entity.getId());
+ indexWriter.updateDocument(term, doc);
+ }
+
@Override
public void addEntity(Entity entity) {
if ( log.isDebugEnabled() )
log.debug("Add entity: " + entity) ;
try {
- Document doc = doc(entity) ;
- Analyzer analyzer = null;
- if (isMultilingual())
- analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
-
- if (analyzer != null)
- indexWriter.addDocument(doc, analyzer) ;
- else //use the default one
- indexWriter.addDocument(doc) ;
+ addDocument(entity);
}
catch (IOException e) {
throw new TextIndexException(e) ;
}
}
- private Document doc(Entity entity) {
+ protected void addDocument(Entity entity) throws IOException {
+ Document doc = doc(entity) ;
+ indexWriter.addDocument(doc) ;
+ }
+
+ protected Document doc(Entity entity) {
Document doc = new Document() ;
Field entField = new Field(docDef.getEntityField(), entity.getId(), ftIRI) ;
doc.add(entField) ;
@@ -252,18 +246,18 @@ public class TextIndexLucene implements TextIndex {
doc.add(gField) ;
}
+ for ( Field field : buildContentFields(entity) )
+ doc.add(field);
+
+ return doc ;
+ }
+
+ protected List<Field> buildContentFields(Entity entity) {
+ List<Field> list = new ArrayList<>();
for ( Entry<String, Object> e : entity.getMap().entrySet() ) {
- Field field = new Field(e.getKey(), (String)e.getValue(), ftText) ;
- doc.add(field) ;
- if (isMultilingual()) {
- String lang = entity.getLanguage();
- if (lang == null || "".equals(lang))
- lang = "undef";
- field = new Field("lang", lang, StringField.TYPE_STORED ) ;
- doc.add(field) ;
- }
+ list.add( new Field(e.getKey(), (String) e.getValue(), ftText) );
}
- return doc ;
+ return list;
}
@Override
@@ -289,10 +283,14 @@ public class TextIndexLucene implements TextIndex {
return query ;
}
+ protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+ return parseQuery(queryString, primaryField, analyzer);
+ }
+
private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {
String escaped = QueryParserBase.escape(uri) ;
String qs = docDef.getEntityField() + ":" + escaped ;
- Query query = parseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
+ Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs ;
List<Map<String, Node>> records = new ArrayList<Map<String, Node>>() ;
@@ -339,14 +337,7 @@ public class TextIndexLucene implements TextIndex {
private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException {
IndexSearcher indexSearcher = new IndexSearcher(indexReader) ;
- Analyzer qAnalyzer = queryAnalyzer;
- if (isMultilingual()) {//index and query analyzer must be the same
- String lang = qs.substring( qs.lastIndexOf(":") + 1);
- if (!"undef".equals(lang))
- qAnalyzer = LuceneUtil.getLocalizedAnalyzer(lang);
- }
-
- Query query = parseQuery(qs, docDef.getPrimaryField(), qAnalyzer) ;
+ Query query = preParseQuery(qs, docDef.getPrimaryField(), queryAnalyzer) ;
if ( limit <= 0 )
limit = MAX_N ;
ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs ;
@@ -370,11 +361,6 @@ public class TextIndexLucene implements TextIndex {
return docDef ;
}
- @Override
- public boolean isMultilingual() {
- return false;
- }
-
private Node entryToNode(String v) {
// TEMP
return NodeFactoryExtra.createLiteralNode(v, null, null) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
index 86b34e6..cdf7876 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLuceneMultilingual.java
@@ -18,8 +18,18 @@
package org.apache.jena.query.text;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
+import java.io.IOException;
+import java.util.List;
+
public class TextIndexLuceneMultilingual extends TextIndexLucene {
public TextIndexLuceneMultilingual(Directory directory, EntityDefinition def) {
@@ -27,7 +37,40 @@ public class TextIndexLuceneMultilingual extends TextIndexLucene {
}
@Override
- public boolean isMultilingual() {
- return true;
+ protected void updateDocument(Entity entity) throws IOException {
+ Document doc = doc(entity);
+ Term term = new Term(getDocDef().getEntityField(), entity.getId());
+ Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ if (analyzer == null)
+ analyzer = getAnalyzer();
+ getIndexWriter().updateDocument(term, doc, analyzer) ;
+ }
+
+ @Override
+ protected void addDocument(Entity entity) throws IOException {
+ Document doc = doc(entity) ;
+ Analyzer analyzer = LuceneUtil.getLocalizedAnalyzer(entity.getLanguage());
+ if (analyzer == null)
+ analyzer = getAnalyzer();
+ getIndexWriter().addDocument(doc, analyzer) ;
+ }
+
+ @Override
+ protected List<Field> buildContentFields(Entity entity) {
+ List<Field> list = super.buildContentFields(entity);
+ String lang = entity.getLanguage();
+ if (lang == null || "".equals(lang))
+ lang = "undef";
+ list.add( new Field("lang", lang, StringField.TYPE_STORED ) );
+ return list;
+ }
+
+ @Override
+ protected Query preParseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException {
+ String lang = queryString.substring( queryString.lastIndexOf(":") + 1);
+ if (!"undef".equals(lang))
+ analyzer = LuceneUtil.getLocalizedAnalyzer(lang);
+
+ return super.preParseQuery(queryString, primaryField, analyzer);
}
}
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
index 5be898c..54a3263 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java
@@ -231,11 +231,6 @@ public class TextIndexSolr implements TextIndex
return docDef ;
}
- @Override
- public boolean isMultilingual() {
- return false;
- }
-
private Node entryToNode(String v)
{
// TEMP
http://git-wip-us.apache.org/repos/asf/jena/blob/7ab59ed6/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
----------------------------------------------------------------------
diff --git a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
index cc7e4f6..4fac00b 100644
--- a/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
+++ b/jena-text/src/main/java/org/apache/jena/query/text/TextQueryPF.java
@@ -74,11 +74,9 @@ public class TextQueryPF extends PropertyFunctionBase {
if (argObject.isList()) {
//extract of extra lang arg if present and if is usable (multilingual index).
//arg is removed from the list to avoid conflict with order and args length
- if (server.isMultilingual()) {
- langArg = extractArg("lang", argObject);
- if (langArg == null)
- langArg = "undef";
- }
+ langArg = extractArg("lang", argObject);
+ if (langArg == null && server instanceof TextIndexLuceneMultilingual)
+ langArg = "undef";
List<Node> list = argObject.getArgList() ;
if (list.size() == 0)