You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jetspeed-dev@portals.apache.org by pa...@apache.org on 2003/05/07 07:38:30 UTC
cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java
paulsp 2003/05/06 22:38:30
Modified: src/java/org/apache/jetspeed/services/lucene
TestLuceneSearch.java LuceneSearchService.java
Log:
Added add() method.
Note:
Add() will allows duplicates. To be fixed later.
Title and description are not parsed out ot the document.
Revision Changes Path
1.2 +42 -3 jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/TestLuceneSearch.java
Index: TestLuceneSearch.java
===================================================================
RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/TestLuceneSearch.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TestLuceneSearch.java 7 May 2003 00:17:16 -0000 1.1
+++ TestLuceneSearch.java 7 May 2003 05:38:29 -0000 1.2
@@ -54,6 +54,7 @@
package org.apache.jetspeed.services.lucene;
+import java.net.URL;
// Java imports
import java.util.HashMap;
@@ -140,6 +141,15 @@
return new TestSuite( TestLuceneSearch.class );
}
+ public void testPutWebPage() throws Exception
+ {
+ URL jetspeedHomePage = new URL("http://jakarta.apache.org/jetspeed");
+ assertNotNull("Created URL to Jetspeed Home Page", jetspeedHomePage);
+ assertTrue("Adding to index", LuceneSearch.add(jetspeedHomePage));
+ assertTrue("Adding to index", LuceneSearch.add(new URL("http://www.google.com")));
+ assertTrue("Adding to index", LuceneSearch.add(new URL("http://jakarta.apache.org")));
+ }
+
/**
*
* @throws Exception
@@ -147,8 +157,23 @@
public void testVerifyJetspeedSearch() throws Exception
{
SearchResult result = null;
- SearchResults results = LuceneSearch.search( "+jetspeed +overview");
- System.out.println("hits = " + results.size());
+ SearchResults results = LuceneSearch.search( "Jetspeed");
+ System.out.println("Query 'Jetspeed' hits = " + results.size());
+ for (int i = 0; i < results.size(); i++)
+ {
+ result = results.get(i);
+ System.out.println("Score = " + result.getScore());
+ System.out.println("title = " + result.getTitle());
+ System.out.println("summary = " + result.getDescription());
+ System.out.println("url = " + result.getDocumentURL());
+ }
+ }
+
+ public void testVerifyJetspeedSearch1() throws Exception
+ {
+ SearchResult result = null;
+ SearchResults results = LuceneSearch.search( "Jetspeed Lucene");
+ System.out.println("Query 'Jetspeed Lucene' hits = " + results.size());
for (int i = 0; i < results.size(); i++)
{
result = results.get(i);
@@ -159,4 +184,18 @@
}
}
+ public void testVerifyJetspeedSearch2() throws Exception
+ {
+ SearchResult result = null;
+ SearchResults results = LuceneSearch.search( "google");
+ System.out.println("Query 'goggle' hits = " + results.size());
+ for (int i = 0; i < results.size(); i++)
+ {
+ result = results.get(i);
+ System.out.println("Score = " + result.getScore());
+ System.out.println("title = " + result.getTitle());
+ System.out.println("summary = " + result.getDescription());
+ System.out.println("url = " + result.getDocumentURL());
+ }
+ }
}
1.2 +114 -17 jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/LuceneSearchService.java
Index: LuceneSearchService.java
===================================================================
RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/LuceneSearchService.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LuceneSearchService.java 7 May 2003 00:17:16 -0000 1.1
+++ LuceneSearchService.java 7 May 2003 05:38:29 -0000 1.2
@@ -59,6 +59,10 @@
import java.io.IOException;
import java.net.URL;
import javax.servlet.ServletConfig;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpException;
+import org.apache.commons.httpclient.HttpMethod;
+import org.apache.commons.httpclient.methods.GetMethod;
// Jetspeed imports
@@ -75,6 +79,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
@@ -87,12 +92,19 @@
public static final String SERVICE_NAME = "LuceneSearch";
private static final String CONFIG_DIRECTORY = "directory";
+ private static final String CONFIG_CONTENT_FIELDNAME = "fieldname.content";
+ private static final String CONFIG_CONTENT_FIELDNAME_DEFAULT = "Content";
private static final String CONFIG_DESCRIPTION_FIELDNAME = "fieldname.description";
+ private static final String CONFIG_DESCRIPTION_FIELDNAME_DEFAULT = "Description";
private static final String CONFIG_TITLE_FIELDNAME = "fieldname.title";
+ private static final String CONFIG_TITLE_FIELDNAME_DEFAULT = "Title";
private static final String CONFIG_URL_FIELDNAME = "fieldname.url";
+ private static final String CONFIG_URL_FIELDNAME_DEFAULT = "URL";
+ private String contentFieldName = null;
private String descriptionFieldName = null;
+ private File rootDir = null;
private String indexRoot = null;
- private Searcher searcher = null;
+ // private Searcher searcher = null;
private String titleFieldName = null;
private String urlFieldName = null;
@@ -172,16 +184,17 @@
.getResources(LuceneSearchService.SERVICE_NAME);
// Get config properties
- descriptionFieldName = serviceConf.getString( CONFIG_DESCRIPTION_FIELDNAME);
- titleFieldName = serviceConf.getString( CONFIG_TITLE_FIELDNAME);
- urlFieldName = serviceConf.getString( CONFIG_URL_FIELDNAME);
+ contentFieldName = serviceConf.getString( CONFIG_CONTENT_FIELDNAME, CONFIG_CONTENT_FIELDNAME_DEFAULT);
+ descriptionFieldName = serviceConf.getString(CONFIG_DESCRIPTION_FIELDNAME, CONFIG_DESCRIPTION_FIELDNAME_DEFAULT);
+ titleFieldName = serviceConf.getString(CONFIG_TITLE_FIELDNAME, CONFIG_TITLE_FIELDNAME_DEFAULT);
+ urlFieldName = serviceConf.getString(CONFIG_URL_FIELDNAME, CONFIG_URL_FIELDNAME_DEFAULT);
indexRoot = serviceConf.getString( CONFIG_DIRECTORY);
//
// The following section opens or creates the search index
//
//
- File rootDir = new File(indexRoot);
+ rootDir = new File(indexRoot);
//If the rootDir does not exist, treat it as context relative
if (!rootDir.exists())
@@ -200,7 +213,9 @@
try
{
+ Searcher searcher = null;
searcher = new IndexSearcher( rootDir.getPath());
+ searcher.close();
}
catch (Exception e)
{
@@ -211,7 +226,6 @@
indexWriter.close();
indexWriter = null;
Log.info("Created Lucene Index in " + rootDir.getPath());
- searcher = new IndexSearcher(rootDir.getPath());
}
catch (Exception e1)
{
@@ -230,21 +244,23 @@
*
* @param searchString is the what is being searched for
* @return Hits, if no hits then null.
+ *
+ * @task Parse content into title and description fields
*/
public SearchResults search(String searchString)
{
+ Searcher searcher = null;
Hits hits = null;
try
{
+ searcher = new IndexSearcher(rootDir.getPath());
Analyzer analyzer = new StandardAnalyzer();
-
- Query query = QueryParser.parse(searchString, "contents", analyzer);
- System.out.println("Searching for: " + query.toString("contents"));
-
+ Query query = QueryParser.parse(searchString, this.contentFieldName, analyzer);
hits = searcher.search(query);
}
catch (Exception e)
{
+ e.printStackTrace();
Log.error(e);
}
@@ -259,22 +275,33 @@
{
doc = hits.doc(counter);
result.setScore(hits.score(counter));
- result.setDescription(doc.getField(this.descriptionFieldName).toString());
- result.setTitle(doc.getField(this.titleFieldName).toString());
- result.setDocumentURL(doc.getField(this.urlFieldName).toString());
+ //result.setDescription(doc.getField(this.descriptionFieldName).toString());
+ //result.setTitle(doc.getField(this.titleFieldName).toString());
+ result.setDocumentURL(doc.getField(this.urlFieldName).stringValue());
results.add(counter, result);
}
catch (IOException ioe)
{
Log.error(ioe);
- throw new Error("Error retrieving search results", ioe);
}
result = null;
}
+
+ if (searcher != null)
+ {
+ try
+ {
+ searcher.close();
+ }
+ catch (IOException ioe)
+ {
+ Log.error("Closing Searcher", ioe);
+ }
+ }
return results;
}
-
- /**
+
+ /**
* Add a page to be indexed.
*
* @return true is page added sucessfully added.
@@ -282,7 +309,77 @@
*/
public boolean add(URL pageToAdd)
{
- return false;
+ HttpClient client = new HttpClient();
+ client.startSession(pageToAdd);
+ GetMethod method = new GetMethod("/");
+ method.setFollowRedirects(true);
+ int statusCode = -1;
+ int attempt = 0;
+
+ // We will retry up to 3 times.
+ while (statusCode == -1 && attempt < 3)
+ {
+ try
+ {
+ // execute the method.
+ client.executeMethod(method);
+ statusCode = method.getStatusCode();
+ System.out.println("Status code = " + statusCode);
+ }
+ catch (HttpException e)
+ {
+ // We will retry
+ }
+ catch (IOException e)
+ {
+ return false;
+ }
+ }
+ // Check that we didn't run out of retries.
+ if (statusCode != -1)
+ {
+ String content = null;
+ try
+ {
+ content = method.getDataAsString();
+ }
+ catch (IOException ioe)
+ {
+ Log.error("Getting content for " + pageToAdd.toString(), ioe);
+ }
+
+ if (content != null)
+ {
+ try
+ {
+ Document doc = new Document();
+ doc.add(Field.Text(this.urlFieldName, pageToAdd.toString()));
+ doc.add(Field.Text(this.contentFieldName, content));
+ IndexWriter indexWriter = new IndexWriter( rootDir, new StandardAnalyzer(), false);
+ indexWriter.addDocument(doc);
+ System.out.println("Index Docuemnt Count = " + indexWriter.docCount());
+ indexWriter.optimize();
+ indexWriter.close();
+ Log.info("Added '" + pageToAdd.toString() + "' to index");
+ }
+ catch (Exception e)
+ {
+ e.printStackTrace();
+ Log.error("Adding document to index", e);
+ }
+ }
+ }
+ try
+ {
+ client.endSession();
+ }
+ catch (IOException ioe)
+ {
+ ioe.printStackTrace();
+ Log.error("Ending session to " + pageToAdd.toString(), ioe);
+ }
+
+ return (statusCode == 200);
}
public String[] getSearchSets()
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
RE: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java
Posted by Mark Orciuch <ma...@ngsltd.com>.
Paul,
>
> 1) httpclient v1.0 was already in the lib directory
>
I will probably look really stupid when you tell me where it is - I can't
see it there. Is it under a different name?
> 2) Yes, lucene v1.3 Release Candidate 1, but I am not aware of a reason
> not to use it. I have requested the jar be added to the Maven repository
>
Cool.
Best regards,
Mark Orciuch - morciuch@apache.org
Jakarta Jetspeed - Enterprise Portal in Java
http://jakarta.apache.org/jetspeed/
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
Re: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
TestLuceneSearch.java LuceneSearchService.java
Posted by Paul Spencer <pa...@mindspring.com>.
Mark,
1) httpclient v1.0 was already in the lib directory
2) Yes, lucene v1.3 Release Candidate 1, but I am not aware of a reason
not to use it. I have requested the jar be added to the Maven repository
Paul Spencer
Mark Orciuch wrote:
>Paul,
>
>Looks like there's another runtime dependency: commons-httpclient. Also, I'm
>curious why did you use lucene-1.3-rc1 and not the current stable release
>1.2. Version 1.3-rc1 is not in the maven repository.
>
>Best regards,
>
>Mark Orciuch - morciuch@apache.org
>Jakarta Jetspeed - Enterprise Portal in Java
>http://jakarta.apache.org/jetspeed/
>
>
>
>>-----Original Message-----
>>From: Paul Spencer [mailto:paulspencer@mindspring.com]
>>Sent: Wednesday, May 07, 2003 4:48 PM
>>To: Jetspeed Developers List
>>Subject: Re: cvs commit:
>>jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
>>TestLuceneSearch.java LuceneSearchService.java
>>
>>
>>Mark,
>>Thanks for catching the missing jar.
>>
>>Paul Spencer
>>
>>Mark Orciuch wrote:
>>
>>
>>
>>>Paul,
>>>
>>>We need a lucene .jar checked in to get this to compile. Or have you been
>>>working with Maven too much lately ;-)
>>>
>>>Best regards,
>>>
>>>Mark Orciuch - morciuch@apache.org
>>>Jakarta Jetspeed - Enterprise Portal in Java
>>>http://jakarta.apache.org/jetspeed/
>>>
>>>
>>>
>>>
>>>
>>
>>
>>---------------------------------------------------------------------
>>To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
>>For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>>
>>
>>
>>
>>
>>
>
>
>
>---------------------------------------------------------------------
>To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
>For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
RE: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java
Posted by Mark Orciuch <ma...@ngsltd.com>.
Paul,
Looks like there's another runtime dependency: commons-httpclient. Also, I'm
curious why did you use lucene-1.3-rc1 and not the current stable release
1.2. Version 1.3-rc1 is not in the maven repository.
Best regards,
Mark Orciuch - morciuch@apache.org
Jakarta Jetspeed - Enterprise Portal in Java
http://jakarta.apache.org/jetspeed/
> -----Original Message-----
> From: Paul Spencer [mailto:paulspencer@mindspring.com]
> Sent: Wednesday, May 07, 2003 4:48 PM
> To: Jetspeed Developers List
> Subject: Re: cvs commit:
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
> TestLuceneSearch.java LuceneSearchService.java
>
>
> Mark,
> Thanks for catching the missing jar.
>
> Paul Spencer
>
> Mark Orciuch wrote:
>
> >Paul,
> >
> >We need a lucene .jar checked in to get this to compile. Or have you been
> >working with Maven too much lately ;-)
> >
> >Best regards,
> >
> >Mark Orciuch - morciuch@apache.org
> >Jakarta Jetspeed - Enterprise Portal in Java
> >http://jakarta.apache.org/jetspeed/
> >
> >
> >
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
Re: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
TestLuceneSearch.java LuceneSearchService.java
Posted by Paul Spencer <pa...@mindspring.com>.
Mark,
Thanks for catching the missing jar.
Paul Spencer
Mark Orciuch wrote:
>Paul,
>
>We need a lucene .jar checked in to get this to compile. Or have you been
>working with Maven too much lately ;-)
>
>Best regards,
>
>Mark Orciuch - morciuch@apache.org
>Jakarta Jetspeed - Enterprise Portal in Java
>http://jakarta.apache.org/jetspeed/
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
RE: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java
Posted by Mark Orciuch <ma...@ngsltd.com>.
Paul,
We need a lucene .jar checked in to get this to compile. Or have you been
working with Maven too much lately ;-)
Best regards,
Mark Orciuch - morciuch@apache.org
Jakarta Jetspeed - Enterprise Portal in Java
http://jakarta.apache.org/jetspeed/
> -----Original Message-----
> From: paulsp@apache.org [mailto:paulsp@apache.org]
> Sent: Wednesday, May 07, 2003 12:39 AM
> To: jakarta-jetspeed-cvs@apache.org
> Subject: cvs commit:
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
> TestLuceneSearch.java LuceneSearchService.java
>
>
> paulsp 2003/05/06 22:38:30
>
> Modified: src/java/org/apache/jetspeed/services/lucene
> TestLuceneSearch.java LuceneSearchService.java
> Log:
> Added add() method.
> Note:
> Add() will allows duplicates. To be fixed later.
> Title and description are not parsed out ot the document.
>
> Revision Changes Path
> 1.2 +42 -3
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/Test
> LuceneSearch.java
>
> Index: TestLuceneSearch.java
> ===================================================================
> RCS file:
> /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/l
> ucene/TestLuceneSearch.java,v
> retrieving revision 1.1
> retrieving revision 1.2
> diff -u -r1.1 -r1.2
> --- TestLuceneSearch.java 7 May 2003 00:17:16 -0000 1.1
> +++ TestLuceneSearch.java 7 May 2003 05:38:29 -0000 1.2
> @@ -54,6 +54,7 @@
>
> package org.apache.jetspeed.services.lucene;
>
> +import java.net.URL;
> // Java imports
> import java.util.HashMap;
>
> @@ -140,6 +141,15 @@
> return new TestSuite( TestLuceneSearch.class );
> }
>
> + public void testPutWebPage() throws Exception
> + {
> + URL jetspeedHomePage = new
> URL("http://jakarta.apache.org/jetspeed");
> + assertNotNull("Created URL to Jetspeed Home Page",
> jetspeedHomePage);
> + assertTrue("Adding to index",
> LuceneSearch.add(jetspeedHomePage));
> + assertTrue("Adding to index", LuceneSearch.add(new
> URL("http://www.google.com")));
> + assertTrue("Adding to index", LuceneSearch.add(new
> URL("http://jakarta.apache.org")));
> + }
> +
> /**
> *
> * @throws Exception
> @@ -147,8 +157,23 @@
> public void testVerifyJetspeedSearch() throws Exception
> {
> SearchResult result = null;
> - SearchResults results = LuceneSearch.search(
> "+jetspeed +overview");
> - System.out.println("hits = " + results.size());
> + SearchResults results = LuceneSearch.search( "Jetspeed");
> + System.out.println("Query 'Jetspeed' hits = " +
> results.size());
> + for (int i = 0; i < results.size(); i++)
> + {
> + result = results.get(i);
> + System.out.println("Score = " + result.getScore());
> + System.out.println("title = " + result.getTitle());
> + System.out.println("summary = " + result.getDescription());
> + System.out.println("url = " + result.getDocumentURL());
> + }
> + }
> +
> + public void testVerifyJetspeedSearch1() throws Exception
> + {
> + SearchResult result = null;
> + SearchResults results = LuceneSearch.search(
> "Jetspeed Lucene");
> + System.out.println("Query 'Jetspeed Lucene' hits = " +
> results.size());
> for (int i = 0; i < results.size(); i++)
> {
> result = results.get(i);
> @@ -159,4 +184,18 @@
> }
> }
>
> + public void testVerifyJetspeedSearch2() throws Exception
> + {
> + SearchResult result = null;
> + SearchResults results = LuceneSearch.search( "google");
> + System.out.println("Query 'goggle' hits = " + results.size());
> + for (int i = 0; i < results.size(); i++)
> + {
> + result = results.get(i);
> + System.out.println("Score = " + result.getScore());
> + System.out.println("title = " + result.getTitle());
> + System.out.println("summary = " + result.getDescription());
> + System.out.println("url = " + result.getDocumentURL());
> + }
> + }
> }
>
>
>
> 1.2 +114 -17
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/Luce
> neSearchService.java
>
> Index: LuceneSearchService.java
> ===================================================================
> RCS file:
> /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/l
> ucene/LuceneSearchService.java,v
> retrieving revision 1.1
> retrieving revision 1.2
> diff -u -r1.1 -r1.2
> --- LuceneSearchService.java 7 May 2003 00:17:16 -0000 1.1
> +++ LuceneSearchService.java 7 May 2003 05:38:29 -0000 1.2
> @@ -59,6 +59,10 @@
> import java.io.IOException;
> import java.net.URL;
> import javax.servlet.ServletConfig;
> +import org.apache.commons.httpclient.HttpClient;
> +import org.apache.commons.httpclient.HttpException;
> +import org.apache.commons.httpclient.HttpMethod;
> +import org.apache.commons.httpclient.methods.GetMethod;
>
> // Jetspeed imports
>
> @@ -75,6 +79,7 @@
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.Document;
> +import org.apache.lucene.document.Field;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.IndexSearcher;
> @@ -87,12 +92,19 @@
>
> public static final String SERVICE_NAME = "LuceneSearch";
> private static final String CONFIG_DIRECTORY = "directory";
> + private static final String CONFIG_CONTENT_FIELDNAME =
> "fieldname.content";
> + private static final String
> CONFIG_CONTENT_FIELDNAME_DEFAULT = "Content";
> private static final String CONFIG_DESCRIPTION_FIELDNAME =
> "fieldname.description";
> + private static final String
> CONFIG_DESCRIPTION_FIELDNAME_DEFAULT = "Description";
> private static final String CONFIG_TITLE_FIELDNAME =
> "fieldname.title";
> + private static final String CONFIG_TITLE_FIELDNAME_DEFAULT
> = "Title";
> private static final String CONFIG_URL_FIELDNAME = "fieldname.url";
> + private static final String CONFIG_URL_FIELDNAME_DEFAULT = "URL";
> + private String contentFieldName = null;
> private String descriptionFieldName = null;
> + private File rootDir = null;
> private String indexRoot = null;
> - private Searcher searcher = null;
> + // private Searcher searcher = null;
> private String titleFieldName = null;
> private String urlFieldName = null;
>
> @@ -172,16 +184,17 @@
> .getResources(LuceneSearchService.SERVICE_NAME);
>
> // Get config properties
> - descriptionFieldName = serviceConf.getString(
> CONFIG_DESCRIPTION_FIELDNAME);
> - titleFieldName = serviceConf.getString(
> CONFIG_TITLE_FIELDNAME);
> - urlFieldName = serviceConf.getString( CONFIG_URL_FIELDNAME);
> + contentFieldName = serviceConf.getString(
> CONFIG_CONTENT_FIELDNAME, CONFIG_CONTENT_FIELDNAME_DEFAULT);
> + descriptionFieldName =
> serviceConf.getString(CONFIG_DESCRIPTION_FIELDNAME,
> CONFIG_DESCRIPTION_FIELDNAME_DEFAULT);
> + titleFieldName =
> serviceConf.getString(CONFIG_TITLE_FIELDNAME,
> CONFIG_TITLE_FIELDNAME_DEFAULT);
> + urlFieldName =
> serviceConf.getString(CONFIG_URL_FIELDNAME, CONFIG_URL_FIELDNAME_DEFAULT);
> indexRoot = serviceConf.getString( CONFIG_DIRECTORY);
>
> //
> // The following section opens or creates the search index
> //
> //
> - File rootDir = new File(indexRoot);
> + rootDir = new File(indexRoot);
>
> //If the rootDir does not exist, treat it as context relative
> if (!rootDir.exists())
> @@ -200,7 +213,9 @@
>
> try
> {
> + Searcher searcher = null;
> searcher = new IndexSearcher( rootDir.getPath());
> + searcher.close();
> }
> catch (Exception e)
> {
> @@ -211,7 +226,6 @@
> indexWriter.close();
> indexWriter = null;
> Log.info("Created Lucene Index in " +
> rootDir.getPath());
> - searcher = new IndexSearcher(rootDir.getPath());
> }
> catch (Exception e1)
> {
> @@ -230,21 +244,23 @@
> *
> * @param searchString is the what is being searched for
> * @return Hits, if no hits then null.
> + *
> + * @task Parse content into title and description fields
> */
> public SearchResults search(String searchString)
> {
> + Searcher searcher = null;
> Hits hits = null;
> try
> {
> + searcher = new IndexSearcher(rootDir.getPath());
> Analyzer analyzer = new StandardAnalyzer();
> -
> - Query query = QueryParser.parse(searchString,
> "contents", analyzer);
> - System.out.println("Searching for: " +
> query.toString("contents"));
> -
> + Query query = QueryParser.parse(searchString,
> this.contentFieldName, analyzer);
> hits = searcher.search(query);
> }
> catch (Exception e)
> {
> + e.printStackTrace();
> Log.error(e);
> }
>
> @@ -259,22 +275,33 @@
> {
> doc = hits.doc(counter);
> result.setScore(hits.score(counter));
> -
> result.setDescription(doc.getField(this.descriptionFieldName).toString());
> -
> result.setTitle(doc.getField(this.titleFieldName).toString());
> -
> result.setDocumentURL(doc.getField(this.urlFieldName).toString());
> +
> //result.setDescription(doc.getField(this.descriptionFieldName).to
> String());
> +
> //result.setTitle(doc.getField(this.titleFieldName).toString());
> +
> result.setDocumentURL(doc.getField(this.urlFieldName).stringValue());
> results.add(counter, result);
> }
> catch (IOException ioe)
> {
> Log.error(ioe);
> - throw new Error("Error retrieving search
> results", ioe);
> }
> result = null;
> }
> +
> + if (searcher != null)
> + {
> + try
> + {
> + searcher.close();
> + }
> + catch (IOException ioe)
> + {
> + Log.error("Closing Searcher", ioe);
> + }
> + }
> return results;
> }
> -
> - /**
> +
> + /**
> * Add a page to be indexed.
> *
> * @return true is page added sucessfully added.
> @@ -282,7 +309,77 @@
> */
> public boolean add(URL pageToAdd)
> {
> - return false;
> + HttpClient client = new HttpClient();
> + client.startSession(pageToAdd);
> + GetMethod method = new GetMethod("/");
> + method.setFollowRedirects(true);
> + int statusCode = -1;
> + int attempt = 0;
> +
> + // We will retry up to 3 times.
> + while (statusCode == -1 && attempt < 3)
> + {
> + try
> + {
> + // execute the method.
> + client.executeMethod(method);
> + statusCode = method.getStatusCode();
> + System.out.println("Status code = " + statusCode);
> + }
> + catch (HttpException e)
> + {
> + // We will retry
> + }
> + catch (IOException e)
> + {
> + return false;
> + }
> + }
> + // Check that we didn't run out of retries.
> + if (statusCode != -1)
> + {
> + String content = null;
> + try
> + {
> + content = method.getDataAsString();
> + }
> + catch (IOException ioe)
> + {
> + Log.error("Getting content for " +
> pageToAdd.toString(), ioe);
> + }
> +
> + if (content != null)
> + {
> + try
> + {
> + Document doc = new Document();
> + doc.add(Field.Text(this.urlFieldName,
> pageToAdd.toString()));
> + doc.add(Field.Text(this.contentFieldName,
> content));
> + IndexWriter indexWriter = new IndexWriter(
> rootDir, new StandardAnalyzer(), false);
> + indexWriter.addDocument(doc);
> + System.out.println("Index Docuemnt Count =
> " + indexWriter.docCount());
> + indexWriter.optimize();
> + indexWriter.close();
> + Log.info("Added '" + pageToAdd.toString()
> + "' to index");
> + }
> + catch (Exception e)
> + {
> + e.printStackTrace();
> + Log.error("Adding document to index", e);
> + }
> + }
> + }
> + try
> + {
> + client.endSession();
> + }
> + catch (IOException ioe)
> + {
> + ioe.printStackTrace();
> + Log.error("Ending session to " +
> pageToAdd.toString(), ioe);
> + }
> +
> + return (statusCode == 200);
> }
>
> public String[] getSearchSets()
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>
>
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org