You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2013/06/23 18:24:49 UTC
svn commit: r1495836 -
/jena/trunk/jena-text/src/main/java/jena/textindexdump.java
Author: andy
Date: Sun Jun 23 16:24:49 2013
New Revision: 1495836
URL: http://svn.apache.org/r1495836
Log:
Development tool - dump the index, given an assembler file.
Added:
jena/trunk/jena-text/src/main/java/jena/textindexdump.java
Added: jena/trunk/jena-text/src/main/java/jena/textindexdump.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-text/src/main/java/jena/textindexdump.java?rev=1495836&view=auto
==============================================================================
--- jena/trunk/jena-text/src/main/java/jena/textindexdump.java (added)
+++ jena/trunk/jena-text/src/main/java/jena/textindexdump.java Sun Jun 23 16:24:49 2013
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package jena ;
+
+import org.apache.jena.query.text.* ;
+import org.apache.jena.query.text.assembler.TextVocab ;
+import org.apache.lucene.analysis.Analyzer ;
+import org.apache.lucene.document.Document ;
+import org.apache.lucene.index.DirectoryReader ;
+import org.apache.lucene.index.IndexReader ;
+import org.apache.lucene.index.IndexableField ;
+import org.apache.lucene.queryparser.classic.QueryParser ;
+import org.apache.lucene.search.IndexSearcher ;
+import org.apache.lucene.search.Query ;
+import org.apache.lucene.search.ScoreDoc ;
+import org.apache.lucene.store.Directory ;
+import org.slf4j.Logger ;
+import org.slf4j.LoggerFactory ;
+import arq.cmd.CmdException ;
+import arq.cmdline.ArgDecl ;
+import arq.cmdline.CmdARQ ;
+
+import com.hp.hpl.jena.sparql.core.assembler.AssemblerUtils ;
+import com.hp.hpl.jena.sparql.util.Utils ;
+
+/**
+ * Text index development tool - dump the index.
+ */
+public class textindexdump extends CmdARQ {
+
+ private static Logger log = LoggerFactory.getLogger(textindexdump.class) ;
+
+ public static final ArgDecl assemblerDescDecl = new ArgDecl(ArgDecl.HasValue, "desc", "dataset") ;
+ protected TextIndex textIndex = null ;
+
+ static public void main(String... argv) {
+ TextQuery.init() ;
+ new textindexdump(argv).mainRun() ;
+ }
+
+ protected textindexdump(String[] argv) {
+ super(argv) ;
+ super.add(assemblerDescDecl, "--desc=", "Assembler description file") ;
+ }
+
+ @Override
+ protected void processModulesAndArgs() {
+ super.processModulesAndArgs() ;
+ // Two forms : with and without arg.
+ // Maximises similarity with other tools.
+ String file ;
+ if ( super.contains(assemblerDescDecl) ) {
+ if ( getValues(assemblerDescDecl).size() != 1 )
+ throw new CmdException("Multiple assembler descriptions given") ;
+ if ( getPositional().size() != 0 )
+ throw new CmdException("Additional assembler descriptions given") ;
+ file = getValue(assemblerDescDecl) ;
+ } else {
+ if ( getNumPositional() != 1 )
+ throw new CmdException("Multiple assembler descriptions given") ;
+ file = getPositionalArg(0) ;
+ }
+ textIndex = (TextIndex)AssemblerUtils.build(file, TextVocab.textIndex) ;
+ }
+
+ @Override
+ protected String getSummary() {
+ return getCommandName() + " assemblerFile" ;
+ }
+
+ @Override
+ protected void exec() {
+
+ if ( textIndex instanceof TextIndexLucene )
+ dump((TextIndexLucene)textIndex) ;
+ else if ( textIndex instanceof TextIndexSolr )
+ dump((TextIndexSolr)textIndex) ;
+ else
+ System.err.println("Unsupported index type : "+Utils.className(textIndex)) ;
+ }
+
+ private static void dump(TextIndexSolr textIndex) { System.err.println("Not implemented : dump Solr index") ; }
+
+ private static void dump(TextIndexLucene textIndex) {
+ try {
+ Directory directory = textIndex.getDirectory() ;
+ Analyzer analyzer = textIndex.getAnalyzer() ;
+ IndexReader indexReader = DirectoryReader.open(directory) ;
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+ QueryParser queryParser = new QueryParser(TextIndexLucene.VER, textIndex.getDocDef().getPrimaryField(), analyzer);
+ Query query = queryParser.parse("*:*");
+ ScoreDoc[] sDocs = indexSearcher.search(query, 1000).scoreDocs ;
+ for ( ScoreDoc sd : sDocs ) {
+ System.out.println("Doc: "+sd.doc) ;
+ Document doc = indexSearcher.doc(sd.doc) ;
+ //System.out.println(doc) ;
+ for ( IndexableField f : doc ) {
+ //System.out.println(" "+f) ;
+ System.out.println(" "+f.name()+" = "+f.stringValue()) ;
+ }
+
+ }
+
+ } catch (Exception ex) { throw new TextIndexException(ex) ; }
+
+ }
+}