You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by eh...@apache.org on 2004/01/26 02:36:15 UTC
cvs commit: jakarta-lucene-sandbox/contributions/lucli/src/lucli LuceneMethods.java Lucli.java
ehatcher 2004/01/25 17:36:15
Modified: contributions/lucli/src/lucli LuceneMethods.java Lucli.java
Log:
lucli code cleanup and a couple of minor enhancements/fixes
Revision Changes Path
1.2 +282 -273 jakarta-lucene-sandbox/contributions/lucli/src/lucli/LuceneMethods.java
Index: LuceneMethods.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/lucli/src/lucli/LuceneMethods.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneMethods.java 11 Dec 2003 02:07:45 -0000 1.1
+++ LuceneMethods.java 26 Jan 2004 01:36:15 -0000 1.2
@@ -92,281 +92,290 @@
* Parts addapted from Lucene demo. Various methods that interact with
* Lucene and provide info about the index, search, etc.
*/
+
class LuceneMethods {
- private int numDocs;
- private String indexName; //directory of this index
- private long version; //version number of this index
- java.util.Iterator fieldIterator;
- Vector fields; //Fields as a vector
- Vector indexedFields; //Fields as a vector
- String fieldsArray[]; //Fields as an array
- Searcher searcher;
- Query query; //current query string
-
- public LuceneMethods(String index) {
- indexName = index;
- message("Lucene CLI. Using directory:" + indexName);
- }
-
-
- public void info() throws java.io.IOException {
- IndexReader indexReader = IndexReader.open(indexName);
-
-
- getFieldInfo();
- numDocs= indexReader.numDocs();
- message("Index has " + numDocs + " documents ");
- message ("All Fields:" + fields.toString());
- message ("Indexed Fields:" + indexedFields.toString());
-
- if (IndexReader.isLocked(indexName)) {
- message("Index is locked");
- }
- //IndexReader.getCurrentVersion(indexName);
- //System.out.println("Version:" + version);
-
- indexReader.close();
- }
-
-
- public void search(String queryString, boolean explain, boolean showTokens) throws java.io.IOException, org.apache.lucene.queryParser.ParseException {
- BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
- Hits hits = initSearch(queryString);
- System.out.println(hits.length() + " total matching documents");
- Query explainQuery;
- if (explain) {
- query = explainQuery(queryString);
- }
-
-
- final int HITS_PER_PAGE = 10;
- message ("--------------------------------------");
- for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
- int end = Math.min(hits.length(), start + HITS_PER_PAGE);
- for (int ii = start; ii < end; ii++) {
- Document doc = hits.doc(ii);
- message ("---------------- " + ii + " score:" + hits.score(ii) + "---------------------");
- printHit(doc);
- if (showTokens) {
- invertDocument(doc);
- }
- if (explain) {
- Explanation exp = searcher.explain(query, hits.id(ii));
- message("Explanation:" + exp.toString());
- }
- }
- message ("#################################################");
-
- if (hits.length() > end) {
- System.out.print("more (y/n) ? ");
- queryString = in.readLine();
- if (queryString.length() == 0 || queryString.charAt(0) == 'n')
- break;
- }
- }
- searcher.close();
- }
-
- private void printHit(Document doc) {
- for (int ii= 0; ii < fieldsArray.length; ii++) {
- String currField = fieldsArray[ii];
- String result = doc.get(currField);
- message(currField + ":" + result);
- }
- //another option is to just do message(doc);
- }
-
- public void optimize () throws IOException{
- //open the index writer. False: don't create a new one
- IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);
- message("Starting to optimize index.");
- long start = System.currentTimeMillis();
- indexWriter.optimize();
- message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
- indexWriter.close();
- }
-
-
- private Query explainQuery(String queryString) throws IOException, ParseException {
-
- searcher = new IndexSearcher(indexName);
- Analyzer analyzer = new StandardAnalyzer();
- getFieldInfo();
-
- BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
-
- MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
-
- int arraySize = indexedFields.size();
- String indexedArray[] = new String[arraySize];
- for (int ii = 0; ii < arraySize; ii++) {
- indexedArray[ii] = (String) indexedFields.get(ii);
- }
- query = parser.parse(queryString, indexedArray, analyzer);
- System.out.println("Searching for: " + query.toString());
- return (query);
-
- }
- private Hits initSearch(String queryString) throws IOException, ParseException {
-
- searcher = new IndexSearcher(indexName);
- Analyzer analyzer = new StandardAnalyzer();
- getFieldInfo();
-
- BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
-
- MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
-
- int arraySize = fields.size();
- fieldsArray = new String[arraySize];
- for (int ii = 0; ii < arraySize; ii++) {
- fieldsArray[ii] = (String) fields.get(ii);
- }
- query = parser.parse(queryString, fieldsArray, analyzer);
- System.out.println("Searching for: " + query.toString());
- Hits hits = searcher.search(query);
- return (hits);
-
- }
-
- public void count(String queryString) throws java.io.IOException, ParseException {
- Hits hits = initSearch(queryString);
- System.out.println(hits.length() + " total documents");
- searcher.close();
- }
-
- static public void message (String s) {
- System.out.println(s);
- }
-
- private void getFieldInfo() throws IOException {
- IndexReader indexReader = IndexReader.open(indexName);
- fields = new Vector();
- indexedFields = new Vector();
-
- //get the list of all field names
- fieldIterator = indexReader.getFieldNames().iterator();
- while (fieldIterator.hasNext()) {
- Object field = fieldIterator.next();
- if (field != null && !field.equals(""))
- fields.add(field.toString());
- }
- //
- //get the list of indexed field names
- fieldIterator = indexReader.getFieldNames(true).iterator();
- while (fieldIterator.hasNext()) {
- Object field = fieldIterator.next();
- if (field != null && !field.equals(""))
- indexedFields.add(field.toString());
- }
- indexReader.close();
- }
-
-
- // Copied from DocumentWriter
- // Tokenizes the fields of a document into Postings.
- private void invertDocument(Document doc)
- throws IOException {
-
- Hashtable tokenHash = new Hashtable();
- final int maxFieldLength = 10000;
-
- Analyzer analyzer = new StandardAnalyzer();
- Enumeration fields = doc.fields();
- while (fields.hasMoreElements()) {
- Field field = (Field) fields.nextElement();
- String fieldName = field.name();
-
-
- if (field.isIndexed()) {
- if (field.isTokenized()) { // un-tokenized field
- Reader reader; // find or make Reader
- if (field.readerValue() != null)
- reader = field.readerValue();
- else if (field.stringValue() != null)
- reader = new StringReader(field.stringValue());
- else
- throw new IllegalArgumentException
- ("field must have either String or Reader value");
-
- int position = 0;
- // Tokenize field and add to postingTable
- TokenStream stream = analyzer.tokenStream(fieldName, reader);
- try {
- for (Token t = stream.next(); t != null; t = stream.next()) {
- position += (t.getPositionIncrement() - 1);
- position++;
- String name = t.termText();
- Integer Count = (Integer)tokenHash.get(name);
- if (Count == null) { // not in there yet
- tokenHash.put(name, new Integer(1)); //first one
- } else {
- int count = Count.intValue();
- tokenHash.put(name, new Integer (count+1));
- }
- if (position > maxFieldLength) break;
- }
- } finally {
- stream.close();
- }
- }
-
- }
- }
- Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
- for (int ii = 0; ii < sortedHash.length && ii < 10; ii ++) {
- Entry currentEntry = sortedHash[ii];
- message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
- }
- }
-
-
- /** Provides a list of the top terms of the index.
- *
- * @param field - the name of the command or null for all of them.
- */
- public void terms(String field) throws IOException {
- TreeMap termMap = new TreeMap();
- IndexReader indexReader = IndexReader.open(indexName);
- TermEnum terms = indexReader.terms();
- while (terms.next()) {
- Term term = terms.term();
- //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
- //if we're either not looking by field or we're matching the specific field
- if ((field == null) || field.equals(term.field()))
- termMap.put(new Integer((0 - terms.docFreq())), term.field() + ":" + term.text());
- }
-
- Iterator termIterator = termMap.keySet().iterator();
- for (int ii=0; termIterator.hasNext() && ii < 100; ii++) {
- Integer termFreq = (Integer) termIterator.next();
- String termDetails = (String) termMap.get(termFreq);
- message(termDetails + ": " + termFreq);
- }
- indexReader.close();
- }
-
- /** Sort Hashtable values
- * @param h the hashtable we're sorting
- * from http://developer.java.sun.com/developer/qow/archive/170/index.jsp
- */
-
- public static Entry[]
- getSortedHashtableEntries(Hashtable h) {
- Set set = h.entrySet();
- Entry [] entries =
- (Entry[])set.toArray(
- new Entry[set.size()]);
- Arrays.sort(entries, new Comparator() {
- public int compare(Object o1, Object o2) {
- Object v1 = ((Entry)o1).getValue();
- Object v2 = ((Entry)o2).getValue();
- return ((Comparable)v2).compareTo(v1); //descending order
- }
- });
- return entries;
- }
+ private int numDocs;
+ private String indexName; //directory of this index
+ private long version; //version number of this index
+ java.util.Iterator fieldIterator;
+ Vector fields; //Fields as a vector
+ Vector indexedFields; //Fields as a vector
+ String fieldsArray[]; //Fields as an array
+ Searcher searcher;
+ Query query; //current query string
+
+ public LuceneMethods(String index) {
+ indexName = index;
+ message("Lucene CLI. Using directory:" + indexName);
+ }
+
+
+ public void info() throws java.io.IOException {
+ IndexReader indexReader = IndexReader.open(indexName);
+
+
+ getFieldInfo();
+ numDocs = indexReader.numDocs();
+ message("Index has " + numDocs + " documents ");
+ message("All Fields:" + fields.toString());
+ message("Indexed Fields:" + indexedFields.toString());
+
+ if (IndexReader.isLocked(indexName)) {
+ message("Index is locked");
+ }
+ //IndexReader.getCurrentVersion(indexName);
+ //System.out.println("Version:" + version);
+
+ indexReader.close();
+ }
+
+
+ public void search(String queryString, boolean explain, boolean showTokens) throws java.io.IOException, org.apache.lucene.queryParser.ParseException {
+ BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+ Hits hits = initSearch(queryString);
+ System.out.println(hits.length() + " total matching documents");
+ if (explain) {
+ query = explainQuery(queryString);
+ }
+
+
+ final int HITS_PER_PAGE = 10;
+ message("--------------------------------------");
+ for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
+ int end = Math.min(hits.length(), start + HITS_PER_PAGE);
+ for (int ii = start; ii < end; ii++) {
+ Document doc = hits.doc(ii);
+ message("---------------- " + (ii + 1) + " score:" + hits.score(ii) + "---------------------");
+ printHit(doc);
+ if (showTokens) {
+ invertDocument(doc);
+ }
+ if (explain) {
+ Explanation exp = searcher.explain(query, hits.id(ii));
+ message("Explanation:" + exp.toString());
+ }
+ }
+ message("#################################################");
+
+ if (hits.length() > end) {
+ System.out.print("more (y/n) ? ");
+ queryString = in.readLine();
+ if (queryString.length() == 0 || queryString.charAt(0) == 'n')
+ break;
+ }
+ }
+ searcher.close();
+ }
+
+ /**
+ * @todo Allow user to specify what field(s) to display
+ */
+ private void printHit(Document doc) {
+ for (int ii = 0; ii < fieldsArray.length; ii++) {
+ String currField = fieldsArray[ii];
+ String[] result = doc.getValues(currField);
+ for (int i = 0; i < result.length; i++) {
+ message(currField + ":" + result[i]);
+ }
+ }
+ //another option is to just do message(doc);
+ }
+
+ public void optimize() throws IOException {
+ //open the index writer. False: don't create a new one
+ IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);
+ message("Starting to optimize index.");
+ long start = System.currentTimeMillis();
+ indexWriter.optimize();
+ message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
+ indexWriter.close();
+ }
+
+
+ private Query explainQuery(String queryString) throws IOException, ParseException {
+
+ searcher = new IndexSearcher(indexName);
+ Analyzer analyzer = new StandardAnalyzer();
+ getFieldInfo();
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+
+ MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
+
+ int arraySize = indexedFields.size();
+ String indexedArray[] = new String[arraySize];
+ for (int ii = 0; ii < arraySize; ii++) {
+ indexedArray[ii] = (String) indexedFields.get(ii);
+ }
+ query = parser.parse(queryString, indexedArray, analyzer);
+ System.out.println("Searching for: " + query.toString());
+ return (query);
+
+ }
+
+ /**
+ * @todo Allow user to specify analyzer
+ */
+ private Hits initSearch(String queryString) throws IOException, ParseException {
+
+ searcher = new IndexSearcher(indexName);
+ Analyzer analyzer = new StandardAnalyzer();
+ getFieldInfo();
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+
+ MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
+
+ int arraySize = fields.size();
+ fieldsArray = new String[arraySize];
+ for (int ii = 0; ii < arraySize; ii++) {
+ fieldsArray[ii] = (String) fields.get(ii);
+ }
+ query = parser.parse(queryString, fieldsArray, analyzer);
+ System.out.println("Searching for: " + query.toString());
+ Hits hits = searcher.search(query);
+ return (hits);
+
+ }
+
+ public void count(String queryString) throws java.io.IOException, ParseException {
+ Hits hits = initSearch(queryString);
+ System.out.println(hits.length() + " total documents");
+ searcher.close();
+ }
+
+ static public void message(String s) {
+ System.out.println(s);
+ }
+
+ private void getFieldInfo() throws IOException {
+ IndexReader indexReader = IndexReader.open(indexName);
+ fields = new Vector();
+ indexedFields = new Vector();
+
+ //get the list of all field names
+ fieldIterator = indexReader.getFieldNames().iterator();
+ while (fieldIterator.hasNext()) {
+ Object field = fieldIterator.next();
+ if (field != null && !field.equals(""))
+ fields.add(field.toString());
+ }
+ //
+ //get the list of indexed field names
+ fieldIterator = indexReader.getFieldNames(true).iterator();
+ while (fieldIterator.hasNext()) {
+ Object field = fieldIterator.next();
+ if (field != null && !field.equals(""))
+ indexedFields.add(field.toString());
+ }
+ indexReader.close();
+ }
+
+
+ // Copied from DocumentWriter
+ // Tokenizes the fields of a document into Postings.
+ private void invertDocument(Document doc)
+ throws IOException {
+
+ Hashtable tokenHash = new Hashtable();
+ final int maxFieldLength = 10000;
+
+ Analyzer analyzer = new StandardAnalyzer();
+ Enumeration fields = doc.fields();
+ while (fields.hasMoreElements()) {
+ Field field = (Field) fields.nextElement();
+ String fieldName = field.name();
+
+
+ if (field.isIndexed()) {
+ if (field.isTokenized()) { // un-tokenized field
+ Reader reader; // find or make Reader
+ if (field.readerValue() != null)
+ reader = field.readerValue();
+ else if (field.stringValue() != null)
+ reader = new StringReader(field.stringValue());
+ else
+ throw new IllegalArgumentException
+ ("field must have either String or Reader value");
+
+ int position = 0;
+ // Tokenize field and add to postingTable
+ TokenStream stream = analyzer.tokenStream(fieldName, reader);
+ try {
+ for (Token t = stream.next(); t != null; t = stream.next()) {
+ position += (t.getPositionIncrement() - 1);
+ position++;
+ String name = t.termText();
+ Integer Count = (Integer) tokenHash.get(name);
+ if (Count == null) { // not in there yet
+ tokenHash.put(name, new Integer(1)); //first one
+ } else {
+ int count = Count.intValue();
+ tokenHash.put(name, new Integer(count + 1));
+ }
+ if (position > maxFieldLength) break;
+ }
+ } finally {
+ stream.close();
+ }
+ }
+
+ }
+ }
+ Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
+ for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {
+ Entry currentEntry = sortedHash[ii];
+ message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
+ }
+ }
+
+
+ /** Provides a list of the top terms of the index.
+ *
+ * @param field - the name of the command or null for all of them.
+ */
+ public void terms(String field) throws IOException {
+ TreeMap termMap = new TreeMap();
+ IndexReader indexReader = IndexReader.open(indexName);
+ TermEnum terms = indexReader.terms();
+ while (terms.next()) {
+ Term term = terms.term();
+ //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
+ //if we're either not looking by field or we're matching the specific field
+ if ((field == null) || field.equals(term.field()))
+ termMap.put(term.field() + ":" + term.text(), new Integer((terms.docFreq())));
+ }
+
+ Iterator termIterator = termMap.keySet().iterator();
+ for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
+ String termDetails = (String) termIterator.next();
+ Integer termFreq = (Integer) termMap.get(termDetails);
+ message(termDetails + ": " + termFreq);
+ }
+ indexReader.close();
+ }
+
+ /** Sort Hashtable values
+ * @param h the hashtable we're sorting
+ * from http://developer.java.sun.com/developer/qow/archive/170/index.jsp
+ */
+
+ public static Entry[]
+ getSortedHashtableEntries(Hashtable h) {
+ Set set = h.entrySet();
+ Entry[] entries =
+ (Entry[]) set.toArray(
+ new Entry[set.size()]);
+ Arrays.sort(entries, new Comparator() {
+ public int compare(Object o1, Object o2) {
+ Object v1 = ((Entry) o1).getValue();
+ Object v2 = ((Entry) o2).getValue();
+ return ((Comparable) v2).compareTo(v1); //descending order
+ }
+ });
+ return entries;
+ }
}
1.3 +2 -2 jakarta-lucene-sandbox/contributions/lucli/src/lucli/Lucli.java
Index: Lucli.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/lucli/src/lucli/Lucli.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- Lucli.java 25 Jan 2004 19:42:21 -0000 1.2
+++ Lucli.java 26 Jan 2004 01:36:15 -0000 1.3
@@ -69,7 +69,7 @@
public class Lucli {
final static String DEFAULT_INDEX = "index"; //directory "index" under the current directory
- final static String HISTORYFILE = ".lucli"; //directory "index" under the current directory
+ final static String HISTORYFILE = ".lucli"; //history file in user's home directory
public final static int MAX_TERMS = 100; //Maximum number of terms we're going to show
// List of commands
@@ -352,7 +352,7 @@
}
private void usage() {
- message("Usage: lucli [-j]");
+ message("Usage: lucli [-r]");
message("Arguments:");
message("\t-r: Provide tab completion and history using the GNU readline shared library ");
}
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org