You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by gr...@apache.org on 2005/02/08 19:15:36 UTC
svn commit: r152682 [13/17] - in lenya/trunk/src: java/org/apache/lenya/ac/
java/org/apache/lenya/ac/cache/ java/org/apache/lenya/ac/file/
java/org/apache/lenya/ac/impl/ java/org/apache/lenya/ac/ldap/
java/org/apache/lenya/cms/ac/ java/org/apache/lenya/cms/ac/cocoon/
java/org/apache/lenya/cms/ac/usecase/ java/org/apache/lenya/cms/ac/usecases/
java/org/apache/lenya/cms/ac/workflow/ java/org/apache/lenya/cms/ant/
java/org/apache/lenya/cms/authoring/ java/org/apache/lenya/cms/cocoon/acting/
java/org/apache/lenya/cms/cocoon/bean/
java/org/apache/lenya/cms/cocoon/components/modules/input/
java/org/apache/lenya/cms/cocoon/flow/
java/org/apache/lenya/cms/cocoon/generation/
java/org/apache/lenya/cms/cocoon/matching/
java/org/apache/lenya/cms/cocoon/scheduler/
java/org/apache/lenya/cms/cocoon/source/
java/org/apache/lenya/cms/cocoon/task/
java/org/apache/lenya/cms/cocoon/transformation/
java/org/apache/lenya/cms/cocoon/uriparameterizer/
java/org/apache/lenya/cms/metadata/dublincore/
java/org/apache/lenya/cms/metadata/usecases/
java/org/apache/lenya/cms/publication/
java/org/apache/lenya/cms/publication/file/
java/org/apache/lenya/cms/publication/task/
java/org/apache/lenya/cms/publication/templating/
java/org/apache/lenya/cms/publication/util/
java/org/apache/lenya/cms/publishing/ java/org/apache/lenya/cms/rc/
java/org/apache/lenya/cms/scheduler/ java/org/apache/lenya/cms/scheduler/xml/
java/org/apache/lenya/cms/search/usecases/ java/org/apache/lenya/cms/site/
java/org/apache/lenya/cms/site/tree/
java/org/apache/lenya/cms/site/usecases/ java/org/apache/lenya/cms/task/
java/org/apache/lenya/cms/usecase/ java/org/apache/lenya/cms/workflow/
java/org/apache/lenya/lucene/ java/org/apache/lenya/lucene/html/
java/org/apache/lenya/lucene/index/ java/org/apache/lenya/lucene/parser/
java/org/apache/lenya/net/ java/org/apache/lenya/search/
java/org/apache/lenya/search/crawler/ java/org/apache/lenya/util/
java/org/apache/lenya/workflow/ java/org/apache/lenya/workflow/impl/
java/org/apache/lenya/xml/ test/org/apache/lenya/ac/file/
test/org/apache/lenya/ac/impl/ test/org/apache/lenya/cms/
test/org/apache/lenya/cms/authoring/
test/org/apache/lenya/cms/cocoon/generation/
test/org/apache/lenya/cms/publication/
test/org/apache/lenya/cms/publication/file/ test/org/apache/lenya/cms/rc/
test/org/apache/lenya/cms/site/tree/ test/org/apache/lenya/cms/task/
test/org/apache/lenya/cms/workflow/ test/org/apache/lenya/net/
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/html/HtmlDocument.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/html/HtmlDocument.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/html/HtmlDocument.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/html/HtmlDocument.java Tue Feb 8 10:13:39 2005
@@ -26,11 +26,13 @@
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
+import org.apache.log4j.Logger;
import org.apache.lucene.document.Field;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
@@ -50,6 +52,7 @@
* </p>
*/
public class HtmlDocument {
+ private static final Logger log = Logger.getLogger(HtmlDocument.class);
private Element rawDoc;
private String luceneTagName = null;
private String luceneClassValue = null;
@@ -66,33 +69,28 @@
tidy.setShowWarnings(false);
org.w3c.dom.Document root = tidy.parseDOM(new FileInputStream(file), null);
- rawDoc = root.getDocumentElement();
+ this.rawDoc = root.getDocumentElement();
}
/**
* Constructs an <code>HtmlDocument</code> from an {@link java.io.InputStream}.
- *
* @param is the <code>InputStream</code> containing the HTML
- * @exception IOException if I/O exception occurs
*/
- public HtmlDocument(InputStream is) throws IOException {
+ public HtmlDocument(InputStream is) {
Tidy tidy = new Tidy();
tidy.setQuiet(true);
tidy.setShowWarnings(false);
org.w3c.dom.Document root = tidy.parseDOM(is, null);
- rawDoc = root.getDocumentElement();
+ this.rawDoc = root.getDocumentElement();
}
/**
* Creates a Lucene <code>Document</code> from an {@link java.io.InputStream}.
- *
- * @param is
+ * @param is the <code>InputStream</code> containing the HTML
* @return org.apache.lucene.document.Document
- * @exception IOException
*/
- public static org.apache.lucene.document.Document getDocument(InputStream is)
- throws IOException {
+ public static org.apache.lucene.document.Document getDocument(InputStream is) {
HtmlDocument htmlDoc = new HtmlDocument(is);
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
@@ -104,51 +102,63 @@
/**
* Creates a Lucene <code>Document</code> from a {@link java.io.File}.
- *
- * @param file
+ * @param file The tile
* @return org.apache.lucene.document.Document
- * @exception IOException
+ * @exception IOException when an IO error occurs
*/
- public static org.apache.lucene.document.Document Document(File file)
+ public static org.apache.lucene.document.Document document(File file)
throws IOException {
- HtmlDocument htmlDoc = new HtmlDocument(file);
- org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
-
- luceneDoc.add(Field.Text("title", htmlDoc.getTitle()));
- luceneDoc.add(Field.Text("contents", htmlDoc.getBody()));
-
- String contents = null;
- BufferedReader br = new BufferedReader(new FileReader(file));
- StringWriter sw = new StringWriter();
- String line = br.readLine();
+ BufferedReader br = null;
+ StringWriter sw = null;
+ org.apache.lucene.document.Document luceneDoc = null;
+
+ String contents;
+
+ try {
+ HtmlDocument htmlDoc = new HtmlDocument(file);
+ luceneDoc = new org.apache.lucene.document.Document();
+
+ luceneDoc.add(Field.Text("title", htmlDoc.getTitle()));
+ luceneDoc.add(Field.Text("contents", htmlDoc.getBody()));
+
+ contents = null;
+ br = new BufferedReader(new FileReader(file));
+ sw = new StringWriter();
+ String line = br.readLine();
+
+ while (line != null) {
+ sw.write(line);
+ line = br.readLine();
+ }
+ contents = sw.toString();
+ luceneDoc.add(Field.UnIndexed("rawcontents", contents));
- while (line != null) {
- sw.write(line);
- line = br.readLine();
+ } catch (final FileNotFoundException e) {
+ log.error("File not found " +e.toString());
+ } catch (final IOException e) {
+ log.error("IO error " +e.toString());
+ } finally {
+ if (br != null)
+ br.close();
+ if (sw != null)
+ sw.close();
}
- br.close();
- contents = sw.toString();
- sw.close();
-
- luceneDoc.add(Field.UnIndexed("rawcontents", contents));
-
return luceneDoc;
}
/**
* Gets the title attribute of the <code>HtmlDocument</code> object.
- *
* @return the title value
*/
public String getTitle() {
- if (rawDoc == null) {
+ if (this.rawDoc == null) {
return null;
}
String title = "";
- NodeList nl = rawDoc.getElementsByTagName("title");
+ NodeList nl = this.rawDoc.getElementsByTagName("title");
if (nl.getLength() > 0) {
Element titleElement = ((Element) nl.item(0));
@@ -164,17 +174,16 @@
/**
* Gets the body text attribute of the <code>HtmlDocument</code> object.
- *
* @return the body text value
*/
public String getBody() {
- if (rawDoc == null) {
+ if (this.rawDoc == null) {
return null;
}
// NOTE: JTidy will insert a meta tag: <meta name="generator" content="HTML Tidy, see www.w3.org" />
// This means that getLength is always greater than 0
- NodeList metaNL = rawDoc.getElementsByTagName("meta");
+ NodeList metaNL = this.rawDoc.getElementsByTagName("meta");
for (int i = 0; i < metaNL.getLength(); i++) {
Element metaElement = (Element) metaNL.item(i);
@@ -183,25 +192,25 @@
if ((nameAttr != null) && (valueAttr != null)) {
if (nameAttr.getValue().equals("lucene-tag-name")) {
- luceneTagName = valueAttr.getValue();
+ this.luceneTagName = valueAttr.getValue();
}
if (nameAttr.getValue().equals("lucene-class-value")) {
- luceneClassValue = valueAttr.getValue();
+ this.luceneClassValue = valueAttr.getValue();
}
}
}
boolean indexByLucene = true;
- if ((luceneTagName != null) && (luceneClassValue != null)) {
+ if ((this.luceneTagName != null) && (this.luceneClassValue != null)) {
indexByLucene = false;
}
System.out.println("HtmlDocument.getBody(): Index By Lucene (Default): " + indexByLucene);
String body = "";
- NodeList nl = rawDoc.getElementsByTagName("body");
+ NodeList nl = this.rawDoc.getElementsByTagName("body");
if (nl.getLength() > 0) {
body = getBodyText(nl.item(0), indexByLucene);
@@ -212,9 +221,8 @@
/**
* Gets the bodyText attribute of the <code>HtmlDocument</code> object.
- *
* @param node a DOM Node
- * @param indexByLucene DOCUMENT ME!
+ * @param indexByLucene Whether the index is by Lucene
* @return The bodyText value
*/
private String getBodyText(Node node, boolean indexByLucene) {
@@ -228,14 +236,14 @@
switch (child.getNodeType()) {
case Node.ELEMENT_NODE:
- if ((luceneTagName != null) && (luceneClassValue != null)) {
- if (child.getNodeName().equals(luceneTagName)) {
+ if ((this.luceneTagName != null) && (this.luceneClassValue != null)) {
+ if (child.getNodeName().equals(this.luceneTagName)) {
Attr attribute = ((Element) child).getAttributeNode("class");
if (attribute != null) {
- if (attribute.getValue().equals(luceneClassValue)) {
- System.out.println("HtmlDocument.getBodyText(): <" + luceneTagName +
- " class=\"" + luceneClassValue + "\"> found!");
+ if (attribute.getValue().equals(this.luceneClassValue)) {
+ System.out.println("HtmlDocument.getBodyText(): <" + this.luceneTagName +
+ " class=\"" + this.luceneClassValue + "\"> found!");
index = true;
}
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/html/ParseException.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/html/ParseException.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/html/ParseException.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/html/ParseException.java Tue Feb 8 10:13:39 2005
@@ -62,18 +62,17 @@
* create this object. This constructor calls its super class with the empty string to force
* the "toString" method of parent class "Throwable" to print the error message in the form:
* ParseException: <result of getMessage>
- *
- * @param currentTokenVal DOCUMENT ME!
- * @param expectedTokenSequencesVal DOCUMENT ME!
- * @param tokenImageVal DOCUMENT ME!
+ * @param currentTokenVal Value of the current token
+ * @param expectedTokenSequencesVal Value of the expected token sequences
+ * @param tokenImageVal Value of the image token
*/
public ParseException(Token currentTokenVal, int[][] expectedTokenSequencesVal,
String[] tokenImageVal) {
super("");
- specialConstructor = true;
- currentToken = currentTokenVal;
- expectedTokenSequences = expectedTokenSequencesVal;
- tokenImage = tokenImageVal;
+ this.specialConstructor = true;
+ this.currentToken = currentTokenVal;
+ this.expectedTokenSequences = expectedTokenSequencesVal;
+ this.tokenImage = tokenImageVal;
}
/**
@@ -85,17 +84,16 @@
*/
public ParseException() {
super();
- specialConstructor = false;
+ this.specialConstructor = false;
}
/**
* Creates a new ParseException object.
- *
- * @param message DOCUMENT ME!
+ * @param message The message
*/
public ParseException(String message) {
super(message);
- specialConstructor = false;
+ this.specialConstructor = false;
}
/**
@@ -105,61 +103,67 @@
* and you do not catch it (it gets thrown from the parser), then this method is called during
* the printing of the final stack trace, and hence the correct error message gets displayed.
*
- * @return DOCUMENT ME!
+ * @return The exception message
*/
public String getMessage() {
- if (!specialConstructor) {
+ if (!this.specialConstructor) {
return super.getMessage();
}
String expected = "";
+ StringBuffer buf = new StringBuffer();
int maxSize = 0;
- for (int i = 0; i < expectedTokenSequences.length; i++) {
- if (maxSize < expectedTokenSequences[i].length) {
- maxSize = expectedTokenSequences[i].length;
+ for (int i = 0; i < this.expectedTokenSequences.length; i++) {
+ if (maxSize < this.expectedTokenSequences[i].length) {
+ maxSize = this.expectedTokenSequences[i].length;
}
- for (int j = 0; j < expectedTokenSequences[i].length; j++) {
- expected += (tokenImage[expectedTokenSequences[i][j]] + " ");
+ for (int j = 0; j < this.expectedTokenSequences[i].length; j++) {
+ buf.append((this.tokenImage[this.expectedTokenSequences[i][j]] + " "));
}
- if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
- expected += "...";
+ if (this.expectedTokenSequences[i][this.expectedTokenSequences[i].length - 1] != 0) {
+ buf.append("...");
}
- expected += (eol + " ");
+ buf.append((this.eol + " "));
}
- String retval = "Encountered \"";
- Token tok = currentToken.next;
+ expected = buf.toString();
+
+ String retval = null;
+ StringBuffer buf2 = new StringBuffer();
+ buf2.append("Encountered \"");
+ Token tok = this.currentToken.next;
for (int i = 0; i < maxSize; i++) {
if (i != 0) {
- retval += " ";
+ buf2.append(" ");
}
if (tok.kind == 0) {
- retval += tokenImage[0];
+ buf2.append(this.tokenImage[0]);
break;
}
- retval += add_escapes(tok.image);
+ buf2.append(add_escapes(tok.image));
tok = tok.next;
}
- retval += ("\" at line " + currentToken.next.beginLine + ", column " +
- currentToken.next.beginColumn);
- retval += ("." + eol);
+ buf2.append("\" at line " + this.currentToken.next.beginLine + ", column " +
+ this.currentToken.next.beginColumn);
+ buf2.append("." + this.eol);
- if (expectedTokenSequences.length == 1) {
- retval += ("Was expecting:" + eol + " ");
+ if (this.expectedTokenSequences.length == 1) {
+ buf2.append("Was expecting:" + this.eol + " ");
} else {
- retval += ("Was expecting one of:" + eol + " ");
+ buf2.append("Was expecting one of:" + this.eol + " ");
}
- retval += expected;
+ buf2.append(expected);
+ retval = buf2.toString();
return retval;
}
@@ -167,10 +171,8 @@
/**
* Used to convert raw characters to their escaped version when these raw version cannot be
* used as part of an ASCII string literal.
- *
- * @param str DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * @param str The string to be escaped
+ * @return The escaped string
*/
protected String add_escapes(String str) {
StringBuffer retval = new StringBuffer();
@@ -179,47 +181,38 @@
for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i)) {
case 0:
-
continue;
case '\b':
retval.append("\\b");
-
continue;
case '\t':
retval.append("\\t");
-
continue;
case '\n':
retval.append("\\n");
-
continue;
case '\f':
retval.append("\\f");
-
continue;
case '\r':
retval.append("\\r");
-
continue;
case '\"':
retval.append("\\\"");
-
continue;
case '\'':
retval.append("\\\'");
-
continue;
case '\\':
retval.append("\\\\");
-
continue;
default:
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/html/ParserThread.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/html/ParserThread.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/html/ParserThread.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/html/ParserThread.java Tue Feb 8 10:13:39 2005
@@ -26,28 +26,28 @@
HTMLParser parser;
ParserThread(HTMLParser p) {
- parser = p;
+ this.parser = p;
}
/**
- * DOCUMENT ME!
+ * Run method
*/
public void run() { // convert pipeOut to pipeIn
try {
try { // parse document to pipeOut
- parser.HTMLDocument();
+ this.parser.HTMLDocument();
} catch (ParseException e) {
System.out.println("Parse Aborted: " + e.getMessage());
} catch (TokenMgrError e) {
System.out.println("Parse Aborted: " + e.getMessage());
} finally {
- parser.pipeOut.close();
+ this.parser.pipeOut.close();
- synchronized (parser) {
- parser.summary.setLength(HTMLParser.SUMMARY_LENGTH);
- parser.titleComplete = true;
- parser.notifyAll();
+ synchronized (this.parser) {
+ this.parser.summary.setLength(HTMLParser.SUMMARY_LENGTH);
+ this.parser.titleComplete = true;
+ this.parser.notifyAll();
}
}
} catch (IOException e) {
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/html/SimpleCharStream.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/html/SimpleCharStream.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/html/SimpleCharStream.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/html/SimpleCharStream.java Tue Feb 8 10:13:39 2005
@@ -19,16 +19,24 @@
package org.apache.lenya.lucene.html;
+import java.io.Reader;
+import java.io.IOException;
/**
* An implementation of interface CharStream, where the stream is assumed to contain only ASCII
* characters (without unicode processing).
*/
public final class SimpleCharStream {
+ /**
+ * <code>staticFlag</code> Static flag
+ */
public static final boolean staticFlag = false;
int bufsize;
int available;
int tokenBegin;
+ /**
+ * <code>bufpos</code> The buffer position
+ */
public int bufpos = -1;
private int[] bufline;
private int[] bufcolumn;
@@ -43,29 +51,27 @@
/**
* Creates a new SimpleCharStream object.
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
- * @param buffersize DOCUMENT ME!
- */
- public SimpleCharStream(java.io.Reader dstream, int startline, int startcolumn, int buffersize) {
- inputStream = dstream;
- line = startline;
- column = startcolumn - 1;
-
- available = bufsize = buffersize;
- buffer = new char[buffersize];
- bufline = new int[buffersize];
- bufcolumn = new int[buffersize];
+ * @param dstream The reader
+ * @param startline The starting line
+ * @param startcolumn The starting column
+ * @param buffersize The buffer size
+ */
+ public SimpleCharStream(Reader dstream, int startline, int startcolumn, int buffersize) {
+ this.inputStream = dstream;
+ this.line = startline;
+ this.column = startcolumn - 1;
+
+ this.available = this.bufsize = buffersize;
+ this.buffer = new char[buffersize];
+ this.bufline = new int[buffersize];
+ this.bufcolumn = new int[buffersize];
}
/**
* Creates a new SimpleCharStream object.
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
+ * @param dstream The reader
+ * @param startline The starting line
+ * @param startcolumn The starting column
*/
public SimpleCharStream(java.io.Reader dstream, int startline, int startcolumn) {
this(dstream, startline, startcolumn, 4096);
@@ -73,8 +79,7 @@
/**
* Creates a new SimpleCharStream object.
- *
- * @param dstream DOCUMENT ME!
+ * @param dstream The reader
*/
public SimpleCharStream(java.io.Reader dstream) {
this(dstream, 1, 1, 4096);
@@ -82,11 +87,10 @@
/**
* Creates a new SimpleCharStream object.
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
- * @param buffersize DOCUMENT ME!
+ * @param dstream The stream
+ * @param startline The starting line
+ * @param startcolumn The starting column
+ * @param buffersize The buffer size
*/
public SimpleCharStream(java.io.InputStream dstream, int startline, int startcolumn,
int buffersize) {
@@ -95,10 +99,9 @@
/**
* Creates a new SimpleCharStream object.
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
+ * @param dstream The stream
+ * @param startline The starting line
+ * @param startcolumn The starting column
*/
public SimpleCharStream(java.io.InputStream dstream, int startline, int startcolumn) {
this(dstream, startline, startcolumn, 4096);
@@ -106,91 +109,89 @@
/**
* Creates a new SimpleCharStream object.
- *
- * @param dstream DOCUMENT ME!
+ * @param dstream The stream
*/
public SimpleCharStream(java.io.InputStream dstream) {
this(dstream, 1, 1, 4096);
}
- private final void ExpandBuff(boolean wrapAround) {
- char[] newbuffer = new char[bufsize + 2048];
- int[] newbufline = new int[bufsize + 2048];
- int[] newbufcolumn = new int[bufsize + 2048];
+ private final void rxpandBuff(boolean wrapAround) {
+ char[] newbuffer = new char[this.bufsize + 2048];
+ int[] newbufline = new int[this.bufsize + 2048];
+ int[] newbufcolumn = new int[this.bufsize + 2048];
try {
if (wrapAround) {
- System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
- System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos);
- buffer = newbuffer;
-
- System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
- System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
- bufline = newbufline;
-
- System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
- System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
- bufcolumn = newbufcolumn;
+ System.arraycopy(this.buffer, this.tokenBegin, newbuffer, 0, this.bufsize - this.tokenBegin);
+ System.arraycopy(this.buffer, 0, newbuffer, this.bufsize - this.tokenBegin, this.bufpos);
+ this.buffer = newbuffer;
+
+ System.arraycopy(this.bufline, this.tokenBegin, newbufline, 0, this.bufsize - this.tokenBegin);
+ System.arraycopy(this.bufline, 0, newbufline, this.bufsize - this.tokenBegin, this.bufpos);
+ this.bufline = newbufline;
+
+ System.arraycopy(this.bufcolumn, this.tokenBegin, newbufcolumn, 0, this.bufsize - this.tokenBegin);
+ System.arraycopy(this.bufcolumn, 0, newbufcolumn, this.bufsize - this.tokenBegin, this.bufpos);
+ this.bufcolumn = newbufcolumn;
- maxNextCharInd = (bufpos += (bufsize - tokenBegin));
+ this.maxNextCharInd = (this.bufpos += (this.bufsize - this.tokenBegin));
} else {
- System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
- buffer = newbuffer;
+ System.arraycopy(this.buffer, this.tokenBegin, newbuffer, 0, this.bufsize - this.tokenBegin);
+ this.buffer = newbuffer;
- System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
- bufline = newbufline;
+ System.arraycopy(this.bufline, this.tokenBegin, newbufline, 0, this.bufsize - this.tokenBegin);
+ this.bufline = newbufline;
- System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
- bufcolumn = newbufcolumn;
+ System.arraycopy(this.bufcolumn, this.tokenBegin, newbufcolumn, 0, this.bufsize - this.tokenBegin);
+ this.bufcolumn = newbufcolumn;
- maxNextCharInd = (bufpos -= tokenBegin);
+ this.maxNextCharInd = (this.bufpos -= this.tokenBegin);
}
} catch (Throwable t) {
throw new Error(t.getMessage());
}
- bufsize += 2048;
- available = bufsize;
- tokenBegin = 0;
+ this.bufsize += 2048;
+ this.available = this.bufsize;
+ this.tokenBegin = 0;
}
- private final void FillBuff() throws java.io.IOException {
- if (maxNextCharInd == available) {
- if (available == bufsize) {
- if (tokenBegin > 2048) {
- bufpos = maxNextCharInd = 0;
- available = tokenBegin;
- } else if (tokenBegin < 0) {
- bufpos = maxNextCharInd = 0;
+ private final void fillBuff() throws java.io.IOException {
+ if (this.maxNextCharInd == this.available) {
+ if (this.available == this.bufsize) {
+ if (this.tokenBegin > 2048) {
+ this.bufpos = this.maxNextCharInd = 0;
+ this.available = this.tokenBegin;
+ } else if (this.tokenBegin < 0) {
+ this.bufpos = this.maxNextCharInd = 0;
} else {
- ExpandBuff(false);
+ rxpandBuff(false);
}
- } else if (available > tokenBegin) {
- available = bufsize;
- } else if ((tokenBegin - available) < 2048) {
- ExpandBuff(true);
+ } else if (this.available > this.tokenBegin) {
+ this.available = this.bufsize;
+ } else if ((this.tokenBegin - this.available) < 2048) {
+ rxpandBuff(true);
} else {
- available = tokenBegin;
+ this.available = this.tokenBegin;
}
}
int i;
try {
- if ((i = inputStream.read(buffer, maxNextCharInd, available - maxNextCharInd)) == -1) {
- inputStream.close();
+ if ((i = this.inputStream.read(this.buffer, this.maxNextCharInd, this.available - this.maxNextCharInd)) == -1) {
+ this.inputStream.close();
throw new java.io.IOException();
- } else {
- maxNextCharInd += i;
}
+ this.maxNextCharInd += i;
return;
} catch (java.io.IOException e) {
- --bufpos;
+ --this.bufpos;
backup(0);
- if (tokenBegin == -1) {
- tokenBegin = bufpos;
+ if (this.tokenBegin == -1) {
+ this.tokenBegin = this.bufpos;
}
throw e;
@@ -198,51 +199,49 @@
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
- * @throws java.io.IOException DOCUMENT ME!
+ * Begin Token callback
+ * @return A character
+ * @throws IOException if an IO error occurs
*/
- public final char BeginToken() throws java.io.IOException {
- tokenBegin = -1;
+ public final char beginToken() throws IOException {
+ this.tokenBegin = -1;
char c = readChar();
- tokenBegin = bufpos;
+ this.tokenBegin = this.bufpos;
return c;
}
- private final void UpdateLineColumn(char c) {
- column++;
+ private final void updateLineColumn(char c) {
+ this.column++;
- if (prevCharIsLF) {
- prevCharIsLF = false;
- line += (column = 1);
- } else if (prevCharIsCR) {
- prevCharIsCR = false;
+ if (this.prevCharIsLF) {
+ this.prevCharIsLF = false;
+ this.line += (this.column = 1);
+ } else if (this.prevCharIsCR) {
+ this.prevCharIsCR = false;
if (c == '\n') {
- prevCharIsLF = true;
+ this.prevCharIsLF = true;
} else {
- line += (column = 1);
+ this.line += (this.column = 1);
}
}
switch (c) {
case '\r':
- prevCharIsCR = true;
+ this.prevCharIsCR = true;
break;
case '\n':
- prevCharIsLF = true;
+ this.prevCharIsLF = true;
break;
case '\t':
- column--;
- column += (8 - (column & 07));
+ this.column--;
+ this.column += (8 - (this.column & 07));
break;
@@ -250,246 +249,224 @@
break;
}
- bufline[bufpos] = line;
- bufcolumn[bufpos] = column;
+ this.bufline[this.bufpos] = this.line;
+ this.bufcolumn[this.bufpos] = this.column;
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
- * @throws java.io.IOException DOCUMENT ME!
+ * Read the next character
+ * @return The character
+ * @throws IOException if an IO error occurs
*/
- public final char readChar() throws java.io.IOException {
- if (inBuf > 0) {
- --inBuf;
+ public final char readChar() throws IOException {
+ if (this.inBuf > 0) {
+ --this.inBuf;
- if (++bufpos == bufsize) {
- bufpos = 0;
+ if (++this.bufpos == this.bufsize) {
+ this.bufpos = 0;
}
- return buffer[bufpos];
+ return this.buffer[this.bufpos];
}
- if (++bufpos >= maxNextCharInd) {
- FillBuff();
+ if (++this.bufpos >= this.maxNextCharInd) {
+ fillBuff();
}
- char c = buffer[bufpos];
+ char c = this.buffer[this.bufpos];
- UpdateLineColumn(c);
+ updateLineColumn(c);
return (c);
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
+ * Get the column position
+ * @return The position
* @see #getEndColumn
* @deprecated
*/
public final int getColumn() {
- return bufcolumn[bufpos];
+ return this.bufcolumn[this.bufpos];
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
+ * Get the line number
+ * @return The line number
* @see #getEndLine
* @deprecated
*/
public final int getLine() {
- return bufline[bufpos];
+ return this.bufline[this.bufpos];
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get the column position
+ * @return The position
*/
public final int getEndColumn() {
- return bufcolumn[bufpos];
+ return this.bufcolumn[this.bufpos];
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get the line number
+ * @return The line number
*/
public final int getEndLine() {
- return bufline[bufpos];
+ return this.bufline[this.bufpos];
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get the column begin
+ * @return The begin of the column
*/
public final int getBeginColumn() {
- return bufcolumn[tokenBegin];
+ return this.bufcolumn[this.tokenBegin];
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get the line begin
+ * @return The begin of the line
*/
public final int getBeginLine() {
- return bufline[tokenBegin];
+ return this.bufline[this.tokenBegin];
}
/**
- * DOCUMENT ME!
- *
- * @param amount DOCUMENT ME!
+ * Go backwards in the buffer
+ * @param amount The amount to go backwards
*/
public final void backup(int amount) {
- inBuf += amount;
+ this.inBuf += amount;
- if ((bufpos -= amount) < 0) {
- bufpos += bufsize;
+ if ((this.bufpos -= amount) < 0) {
+ this.bufpos += this.bufsize;
}
}
/**
- * DOCUMENT ME!
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
- * @param buffersize DOCUMENT ME!
+ * Reinitialize the Parser
+ * @param dstream The reader
+ * @param startline The starting line
+ * @param startcolumn The starting column
+ * @param buffersize The buffer size
*/
- public void ReInit(java.io.Reader dstream, int startline, int startcolumn, int buffersize) {
- inputStream = dstream;
- line = startline;
- column = startcolumn - 1;
+ public void reInit(java.io.Reader dstream, int startline, int startcolumn, int buffersize) {
+ this.inputStream = dstream;
+ this.line = startline;
+ this.column = startcolumn - 1;
- if ((buffer == null) || (buffersize != buffer.length)) {
- available = bufsize = buffersize;
- buffer = new char[buffersize];
- bufline = new int[buffersize];
- bufcolumn = new int[buffersize];
+ if ((this.buffer == null) || (buffersize != this.buffer.length)) {
+ this.available = this.bufsize = buffersize;
+ this.buffer = new char[buffersize];
+ this.bufline = new int[buffersize];
+ this.bufcolumn = new int[buffersize];
}
- prevCharIsLF = prevCharIsCR = false;
- tokenBegin = inBuf = maxNextCharInd = 0;
- bufpos = -1;
+ this.prevCharIsLF = this.prevCharIsCR = false;
+ this.tokenBegin = this.inBuf = this.maxNextCharInd = 0;
+ this.bufpos = -1;
}
/**
- * DOCUMENT ME!
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
+ * Reinitialize the parser
+ * @param dstream The reader
+ * @param startline The starting line
+ * @param startcolumn The starting column
*/
- public void ReInit(java.io.Reader dstream, int startline, int startcolumn) {
- ReInit(dstream, startline, startcolumn, 4096);
+ public void reInit(java.io.Reader dstream, int startline, int startcolumn) {
+ reInit(dstream, startline, startcolumn, 4096);
}
/**
- * DOCUMENT ME!
- *
- * @param dstream DOCUMENT ME!
+ * Reinitialize the parser
+ * @param reader The reader
*/
- public void ReInit(java.io.Reader dstream) {
- ReInit(dstream, 1, 1, 4096);
+ public void reInit(java.io.Reader reader) {
+ reInit(reader, 1, 1, 4096);
}
/**
- * DOCUMENT ME!
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
- * @param buffersize DOCUMENT ME!
+ * Reinitialize the parser
+ * @param dstream The stream
+ * @param startline The starting line
+ * @param startcolumn The starting column
+ * @param buffersize The buffer size
*/
- public void ReInit(java.io.InputStream dstream, int startline, int startcolumn, int buffersize) {
- ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, 4096);
+ public void reInit(java.io.InputStream dstream, int startline, int startcolumn, int buffersize) {
+ reInit(new java.io.InputStreamReader(dstream), startline, startcolumn, 4096);
}
/**
- * DOCUMENT ME!
- *
- * @param dstream DOCUMENT ME!
+ * Reinitialize the parser
+ * @param dstream The stream
*/
- public void ReInit(java.io.InputStream dstream) {
- ReInit(dstream, 1, 1, 4096);
+ public void reInit(java.io.InputStream dstream) {
+ reInit(dstream, 1, 1, 4096);
}
/**
- * DOCUMENT ME!
- *
- * @param dstream DOCUMENT ME!
- * @param startline DOCUMENT ME!
- * @param startcolumn DOCUMENT ME!
+ * Reinitialize the parser
+ * @param dstream The stream
+ * @param startline The starting line
+ * @param startcolumn The starting column
*/
- public void ReInit(java.io.InputStream dstream, int startline, int startcolumn) {
- ReInit(dstream, startline, startcolumn, 4096);
+ public void reInit(java.io.InputStream dstream, int startline, int startcolumn) {
+ reInit(dstream, startline, startcolumn, 4096);
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get the image
+ * @return The image
*/
- public final String GetImage() {
- if (bufpos >= tokenBegin) {
- return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
- } else {
- return new String(buffer, tokenBegin, bufsize - tokenBegin) +
- new String(buffer, 0, bufpos + 1);
+ public final String getImage() {
+ if (this.bufpos >= this.tokenBegin) {
+ return new String(this.buffer, this.tokenBegin, this.bufpos - this.tokenBegin + 1);
}
+ return new String(this.buffer, this.tokenBegin, this.bufsize - this.tokenBegin) +
+ new String(this.buffer, 0, this.bufpos + 1);
}
/**
- * DOCUMENT ME!
- *
- * @param len DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get a suffix
+ * @param len The length of the suffix
+ * @return The suffix
*/
- public final char[] GetSuffix(int len) {
+ public final char[] getSuffix(int len) {
char[] ret = new char[len];
- if ((bufpos + 1) >= len) {
- System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
+ if ((this.bufpos + 1) >= len) {
+ System.arraycopy(this.buffer, this.bufpos - len + 1, ret, 0, len);
} else {
- System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0, len - bufpos - 1);
- System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
+ System.arraycopy(this.buffer, this.bufsize - (len - this.bufpos - 1), ret, 0, len - this.bufpos - 1);
+ System.arraycopy(this.buffer, 0, ret, len - this.bufpos - 1, this.bufpos + 1);
}
return ret;
}
/**
- * DOCUMENT ME!
+ * Empty all buffers
*/
- public void Done() {
- buffer = null;
- bufline = null;
- bufcolumn = null;
+ public void done() {
+ this.buffer = null;
+ this.bufline = null;
+ this.bufcolumn = null;
}
/**
* Method to adjust line and column numbers for the start of a token.<BR>
- *
- * @param newLine DOCUMENT ME!
- * @param newCol DOCUMENT ME!
+ * @param newLine The new line
+ * @param newCol The new column
*/
public void adjustBeginLineColumn(int newLine, int newCol) {
- int start = tokenBegin;
+ int start = this.tokenBegin;
int len;
- if (bufpos >= tokenBegin) {
- len = bufpos - tokenBegin + inBuf + 1;
+ if (this.bufpos >= this.tokenBegin) {
+ len = this.bufpos - this.tokenBegin + this.inBuf + 1;
} else {
- len = bufsize - tokenBegin + bufpos + 1 + inBuf;
+ len = this.bufsize - this.tokenBegin + this.bufpos + 1 + this.inBuf;
}
int i = 0;
@@ -498,28 +475,28 @@
int nextColDiff = 0;
int columnDiff = 0;
- while ((i < len) && (bufline[j = start % bufsize] == bufline[k = ++start % bufsize])) {
- bufline[j] = newLine;
- nextColDiff = (columnDiff + bufcolumn[k]) - bufcolumn[j];
- bufcolumn[j] = newCol + columnDiff;
+ while ((i < len) && (this.bufline[j = start % this.bufsize] == this.bufline[k = ++start % this.bufsize])) {
+ this.bufline[j] = newLine;
+ nextColDiff = (columnDiff + this.bufcolumn[k]) - this.bufcolumn[j];
+ this.bufcolumn[j] = newCol + columnDiff;
columnDiff = nextColDiff;
i++;
}
if (i < len) {
- bufline[j] = newLine++;
- bufcolumn[j] = newCol + columnDiff;
+ this.bufline[j] = newLine++;
+ this.bufcolumn[j] = newCol + columnDiff;
while (i++ < len) {
- if (bufline[j = start % bufsize] != bufline[++start % bufsize]) {
- bufline[j] = newLine++;
+ if (this.bufline[j = start % this.bufsize] != this.bufline[++start % this.bufsize]) {
+ this.bufline[j] = newLine++;
} else {
- bufline[j] = newLine;
+ this.bufline[j] = newLine;
}
}
}
- line = bufline[j];
- column = bufcolumn[j];
+ this.line = this.bufline[j];
+ this.column = this.bufcolumn[j];
}
}
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/html/Token.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/html/Token.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/html/Token.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/html/Token.java Tue Feb 8 10:13:39 2005
@@ -79,24 +79,22 @@
/**
* Returns the image.
- *
- * @return DOCUMENT ME!
+ * @return The Image
*/
public final String toString() {
- return image;
+ return this.image;
}
/**
* Returns a new Token object, by default. However, if you want, you can create and return
* subclass objects based on the value of ofKind. Simply add the cases to the switch for all
* those special cases. For example, if you have a subclass of Token called IDToken that you
- * want to create if ofKind is ID, simlpy add something like : case MyParserConstants.ID :
+ * want to create if ofKind is ID, simply add something like : case MyParserConstants.ID :
* return new IDToken(); to the following switch statement. Then you can cast matchedToken
* variable to the appropriate type and use it in your lexical actions.
*
- * @param ofKind DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * @param ofKind The kind of token
+ * @return The token
*/
public static final Token newToken(int ofKind) {
switch (ofKind) {
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/html/TokenMgrError.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/html/TokenMgrError.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/html/TokenMgrError.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/html/TokenMgrError.java Tue Feb 8 10:13:39 2005
@@ -19,6 +19,9 @@
package org.apache.lenya.lucene.html;
+/**
+ * The Token Manager Error class
+ */
public class TokenMgrError extends Error {
/*
* Ordinals for various reasons why an Error of this type can be thrown.
@@ -39,46 +42,43 @@
/** Indicates the reason why the exception is thrown. It will have one of the above 4 values. */
int errorCode;
- /*
+ /**
* Constructors of various flavors follow.
*/
public TokenMgrError() {
+ // do nothing
}
/**
* Creates a new TokenMgrError object.
- *
- * @param message DOCUMENT ME!
- * @param reason DOCUMENT ME!
+ * @param message The message
+ * @param reason The error code
*/
public TokenMgrError(String message, int reason) {
super(message);
- errorCode = reason;
+ this.errorCode = reason;
}
/**
* Creates a new TokenMgrError object.
- *
- * @param EOFSeen DOCUMENT ME!
- * @param lexState DOCUMENT ME!
- * @param errorLine DOCUMENT ME!
- * @param errorColumn DOCUMENT ME!
- * @param errorAfter DOCUMENT ME!
- * @param curChar DOCUMENT ME!
- * @param reason DOCUMENT ME!
+ * @param EOFSeen indicates if EOF caused the lexical error
+ * @param lexState lexical state in which this error occured
+ * @param errorLine line number when the error occured
+ * @param errorColumn column number when the error occured
+ * @param errorAfter prefix that was seen before this error occured
+ * @param curChar the offending character
+ * @param reason The error code
*/
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn,
String errorAfter, char curChar, int reason) {
- this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
+ this(lexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
/**
* Replaces unprintable characters by their espaced (or unicode escaped) equivalents in the
* given string
- *
- * @param str DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * @param str The string
+ * @return The escaped string
*/
protected static final String addEscapes(String str) {
StringBuffer retval = new StringBuffer();
@@ -87,47 +87,38 @@
for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i)) {
case 0:
-
continue;
case '\b':
retval.append("\\b");
-
continue;
case '\t':
retval.append("\\t");
-
continue;
case '\n':
retval.append("\\n");
-
continue;
case '\f':
retval.append("\\f");
-
continue;
case '\r':
retval.append("\\r");
-
continue;
case '\"':
retval.append("\\\"");
-
continue;
case '\'':
retval.append("\\\'");
-
continue;
case '\\':
retval.append("\\\\");
-
continue;
default:
@@ -148,22 +139,16 @@
/**
* Returns a detailed message for the Error when it is thrown by the token manager to indicate
- * a lexical error. Parameters : EOFSeen : indicates if EOF caused the lexicl error
- * curLexState : lexical state in which this error occured errorLine : line number when the
- * error occured errorColumn : column number when the error occured errorAfter : prefix that
- * was seen before this error occured curchar : the offending character Note: You can
- * customize the lexical error message by modifying this method.
- *
- * @param EOFSeen DOCUMENT ME!
- * @param lexState DOCUMENT ME!
- * @param errorLine DOCUMENT ME!
- * @param errorColumn DOCUMENT ME!
- * @param errorAfter DOCUMENT ME!
- * @param curChar DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * a lexical error. Note: You can customize the lexical error message by modifying this method.
+ * @param EOFSeen indicates if EOF caused the lexical error
+ * @param lexState lexical state in which this error occured
+ * @param errorLine line number when the error occured
+ * @param errorColumn column number when the error occured
+ * @param errorAfter prefix that was seen before this error occured
+ * @param curChar the offending character
+ * @return The error message
*/
- private static final String LexicalError(boolean EOFSeen, int lexState, int errorLine,
+ private static final String lexicalError(boolean EOFSeen, int lexState, int errorLine,
int errorColumn, String errorAfter, char curChar) {
return ("Lexical error at line " + errorLine + ", column " + errorColumn +
". Encountered: " +
@@ -177,8 +162,7 @@
* cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not of end-users concern, so you can
* return something like : "Internal Error : Please file a bug report .... " from this method
* for such cases in the release version of your parser.
- *
- * @return DOCUMENT ME!
+ * @return The error message
*/
public String getMessage() {
return super.getMessage();
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java Tue Feb 8 10:13:39 2005
@@ -20,32 +20,34 @@
package org.apache.lenya.lucene.index;
import java.io.File;
+import java.io.IOException;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.log4j.Category;
+import org.apache.log4j.Logger;
+/**
+ * Abstract document creator
+ */
public class AbstractDocumentCreator implements DocumentCreator {
- Category log = Category.getInstance(AbstractDocumentCreator.class);
+ private static final Logger log = Logger.getLogger(AbstractDocumentCreator.class);
/** Creates a new instance of AbstractDocumentCreator */
public AbstractDocumentCreator() {
+ // do nothing
}
/**
- * DOCUMENT ME!
- *
- * @param file DOCUMENT ME!
- * @param htdocsDumpDir DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
- * @throws Exception DOCUMENT ME!
+ * Returns a Lucene document
+ * @param file The file
+ * @param htdocsDumpDir The dump directory
+ * @return The Lucene document
+ * @throws IOException if an error occurs
*/
- public Document getDocument(File file, File htdocsDumpDir)
- throws Exception {
+ public Document getDocument(File file, File htdocsDumpDir) throws IOException {
+
// make a new, empty document
Document doc = new Document();
@@ -77,14 +79,14 @@
doc.add(Field.Keyword("modified", DateField.timeToString(file.lastModified())));
// Add the id as a field, so that index can be incrementally maintained.
- String id = IndexIterator.createID(file, htdocsDumpDir);
+ String id = IndexIterator.createID(file, htdocsDumpDir);
log.debug(id);
doc.add(Field.Keyword("id", id));
// Add the uid as a field, so that index can be incrementally maintained.
// This field is not stored with document, it is indexed, but it is not
// tokenized prior to indexing.
- String uid = IndexIterator.createUID(file, htdocsDumpDir);
+ String uid = IndexIterator.createUID(file, htdocsDumpDir);
log.debug(uid);
doc.add(new Field("uid", uid, false, true, false));
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexIteratorHandler.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexIteratorHandler.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexIteratorHandler.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexIteratorHandler.java Tue Feb 8 10:13:39 2005
@@ -24,32 +24,46 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+/**
+ * Abstact base class for Index handling
+ */
public abstract class AbstractIndexIteratorHandler implements IndexIteratorHandler {
/** Creates a new instance of AbstractIndexIteratorHandler */
public AbstractIndexIteratorHandler() {
+ // do nothing
}
/** Handles a stale document.
- *
+ * @param reader The index reader
+ * @param term The term
*/
public void handleStaleDocument(IndexReader reader, Term term) {
+ // do nothing
}
/** Handles a stale document.
- *
+ * @param reader The reader
+ * @param term The term
+ * @param file The file
*/
public void handleUnmodifiedDocument(IndexReader reader, Term term, File file) {
+ // do nothing
}
/** Handles an unmodified document and the file that represents it.
- *
+ * @param reader The reader
+ * @param term The term
+ * @param file The file
*/
public void handleNewDocument(IndexReader reader, Term term, File file) {
+ // do nothing
}
/** Handles a file. This is called for every file and mainly used for creating a new index.
- *
+ * @param reader The reader
+ * @param file The file
*/
public void handleFile(IndexReader reader, File file) {
+ // do nothing
}
}
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java Tue Feb 8 10:13:39 2005
@@ -24,7 +24,7 @@
import java.io.IOException;
import java.util.Arrays;
-import org.apache.log4j.Category;
+import org.apache.log4j.Logger;
import org.apache.lenya.lucene.IndexConfiguration;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
@@ -36,11 +36,11 @@
/**
* Abstract base class for indexers.
- * The factory method {@link #getDocumentCreator(String[])} is used to create a
+ * The factory method {@link #getDocumentCreator} is used to create a
* DocumentCreator from the command-line arguments.
*/
public abstract class AbstractIndexer implements Indexer {
- private static Category log = Category.getInstance(AbstractIndexer.class);
+ static Logger log = Logger.getLogger(AbstractIndexer.class);
private DocumentCreator documentCreator;
private Element indexer;
@@ -50,34 +50,37 @@
* Creates a new instance of AbstractIndexer
*/
public AbstractIndexer() {
+ // do nothing
}
/**
* Returns the DocumentCreator of this indexer.
+ * @return The document creator
*/
protected DocumentCreator getDocumentCreator() {
- return documentCreator;
+ return this.documentCreator;
}
/**
* Initializes this indexer with command-line parameters.
+ * @param _indexer The indexer
+ * @param _configFileName The config file name
+ * @throws IOException
*/
- public void configure(Element indexer, String configFileName) throws Exception {
- documentCreator = createDocumentCreator(indexer, configFileName);
- this.indexer = indexer;
- this.configFileName = configFileName;
+ public void configure(Element _indexer, String _configFileName) throws IOException {
+ this.documentCreator = createDocumentCreator(_indexer, _configFileName);
+ this.indexer = _indexer;
+ this.configFileName = _configFileName;
}
/**
- * DOCUMENT ME!
- *
- * @param element DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
- * @throws Exception DOCUMENT ME!
+ * Creates the document creator
+ * @param _indexer The indexer
+ * @param _configFileName The config file name
+ * @return The document creator
+ * @throws IOException if an error occurs
*/
- public abstract DocumentCreator createDocumentCreator(Element indexer, String configFileName) throws Exception;
+ public abstract DocumentCreator createDocumentCreator(Element _indexer, String _configFileName) throws IOException;
/**
* Updates the index incrementally.
@@ -88,23 +91,27 @@
* <li>unchanged documents, to be left alone, or</li>
* <li>new documents, to be indexed.</li>
* </ol>
+ * @param dumpDirectory
+ * @param index
+ * @throws IOException
*/
- public void updateIndex(File dumpDirectory, File index) throws Exception {
+ public void updateIndex(File dumpDirectory, File index) throws IOException {
deleteStaleDocuments(dumpDirectory, index);
doIndex(dumpDirectory, index, false);
}
/**
- * Updates the index re one document
- *
+ * Updates the index for the document specified
* <ol>
* <li>old documents to be deleted</li>
* <li>unchanged documents, to be left alone, or</li>
* <li>new documents, to be indexed.</li>
* </ol>
+ * @param file The document
+ * @throws IOException if an error occurs
*/
- public void indexDocument(File file) throws Exception {
- IndexConfiguration config = new IndexConfiguration(configFileName);
+ public void indexDocument(File file) throws IOException {
+ IndexConfiguration config = new IndexConfiguration(this.configFileName);
log.debug("File: " + file);
File dumpDir = new File(config.resolvePath(config.getHTDocsDumpDir()));
@@ -149,29 +156,31 @@
/**
* Creates a new index.
+ * @param dumpDirectory The dump directory to use
+ * @param index The index
+ * @throws IOException if an error occurs
*/
public void createIndex(File dumpDirectory, File index)
- throws Exception {
+ throws IOException {
doIndex(dumpDirectory, index, true);
}
/**
* Index files
- *
* @param dumpDirectory Directory where the files to be indexed are located
* @param index Directory where the index shall be located
* @param create <strong>true</strong> means the index will be created from scratch, <strong>false</strong> means it will be indexed incrementally
+ * @throws IOException if an error occurs
*/
- public void doIndex(File dumpDirectory, File index, boolean create) {
+ public void doIndex(File dumpDirectory, File index, boolean create) throws IOException {
if (!index.isDirectory()) {
index.mkdirs();
log.warn("Directory has been created: " + index.getAbsolutePath());
}
- try {
IndexWriter writer = new IndexWriter(index.getAbsolutePath(), new StandardAnalyzer(), create);
writer.maxFieldLength = 1000000;
- IndexInformation info = new IndexInformation(index.getAbsolutePath(), dumpDirectory, getFilter(indexer, configFileName), create);
+ IndexInformation info = new IndexInformation(index.getAbsolutePath(), dumpDirectory, getFilter(this.indexer, this.configFileName), create);
IndexHandler handler;
@@ -181,34 +190,35 @@
handler = new UpdateIndexHandler(dumpDirectory, info, writer);
}
- IndexIterator iterator = new IndexIterator(index.getAbsolutePath(), getFilter(indexer, configFileName));
+ IndexIterator iterator = new IndexIterator(index.getAbsolutePath(), getFilter(this.indexer, this.configFileName));
iterator.addHandler(handler);
iterator.iterate(dumpDirectory);
writer.optimize();
writer.close();
- } catch (IOException e) {
- log.error(e);
- }
}
/**
- * Delete the stale documents.
+ * Delete stale documents.
+ * @param _dumpDirectory The dump directory to use
+ * @param _index The index
*/
- protected void deleteStaleDocuments(File dumpDirectory, File index)
- throws Exception {
+ protected void deleteStaleDocuments(File _dumpDirectory, File _index) {
log.debug("Deleting stale documents");
- IndexIterator iterator = new IndexIterator(index.getAbsolutePath(), getFilter(indexer, configFileName));
+ IndexIterator iterator = new IndexIterator(_index.getAbsolutePath(), getFilter(this.indexer, this.configFileName));
iterator.addHandler(new DeleteHandler());
- iterator.iterate(dumpDirectory);
+ iterator.iterate(_dumpDirectory);
log.debug("Deleting stale documents finished");
}
/**
- * Returns the filter used to receive the indexable files. Might be overwritten by inherited class.
+ * Returns the filter used to receive the indexable files. May be overwritten by inherited class.
+ * @param _indexer The indexer
+ * @param _configFileName The name of the configuration file
+ * @return The filter
*/
- public FileFilter getFilter(Element indexer, String configFileName) {
+ public FileFilter getFilter(Element _indexer, String _configFileName) {
String[] indexableExtensions = { "html", "htm", "txt" };
return new AbstractIndexer.DefaultIndexFilter(indexableExtensions);
}
@@ -224,20 +234,22 @@
*/
public DefaultIndexFilter() {
String[] iE = { "html", "htm", "txt" };
- indexableExtensions = iE;
+ this.indexableExtensions = iE;
}
/**
+ * Constructor
+ * @param _indexableExtensions Array of extensions
*
*/
- public DefaultIndexFilter(String[] indexableExtensions) {
- this.indexableExtensions = indexableExtensions;
+ public DefaultIndexFilter(String[] _indexableExtensions) {
+ this.indexableExtensions = _indexableExtensions;
}
- /** Tests whether or not the specified abstract pathname should be
+ /** Tests whether or not the specified file should be
* included in a pathname list.
+ * @param file The file to be tested
*
- * @param pathname The abstract pathname to be tested
* @return <code>true</code> if and only if <code>pathname</code> should be included
*
*/
@@ -249,7 +261,7 @@
} else {
String fileName = file.getName();
String extension = fileName.substring(fileName.lastIndexOf(".") + 1);
- accept = Arrays.asList(indexableExtensions).contains(extension);
+ accept = Arrays.asList(this.indexableExtensions).contains(extension);
}
return accept;
@@ -266,14 +278,16 @@
*/
public class DeleteHandler extends AbstractIndexIteratorHandler {
/** Handles a stale document.
+ * @param _reader The reader
+ * @param _term The term
*
*/
- public void handleStaleDocument(IndexReader reader, Term term) {
+ public void handleStaleDocument(IndexReader _reader, Term _term) {
log.debug("deleting " +
- IndexIterator.uid2url(term.text()));
+ IndexIterator.uid2url(_term.text()));
try {
- int deletedDocuments = reader.delete(term);
+ int deletedDocuments = _reader.delete(_term);
log.debug("deleted " + deletedDocuments +
" documents.");
} catch (IOException e) {
@@ -283,68 +297,69 @@
}
/**
- * DOCUMENT ME!
+ * The index handler
*/
public class IndexHandler extends AbstractIndexIteratorHandler {
/**
* Creates a new IndexHandler object.
*
- * @param dumpDirectory DOCUMENT ME!
- * @param info DOCUMENT ME!
- * @param writer DOCUMENT ME!
+ * @param _dumpDirectory The dump directory
+ * @param _info The index information
+ * @param _writer The index writer
*/
- public IndexHandler(File dumpDirectory, IndexInformation info, IndexWriter writer) {
- this.info = info;
- this.dumpDirectory = dumpDirectory;
- this.writer = writer;
+ public IndexHandler(File _dumpDirectory, IndexInformation _info, IndexWriter _writer) {
+ this.info = _info;
+ this.dumpDirectory = _dumpDirectory;
+ this.writer = _writer;
}
private IndexInformation info;
protected IndexInformation getInformation() {
- return info;
+ return this.info;
}
private File dumpDirectory;
protected File getDumpDirectory() {
- return dumpDirectory;
+ return this.dumpDirectory;
}
private IndexWriter writer;
protected IndexWriter getWriter() {
- return writer;
+ return this.writer;
}
/**
- * Add document to index
- */
+ * Add document to index
+ * @param file The file to add
+ */
protected void addFile(File file) {
log.debug("adding document: " + file.getAbsolutePath());
try {
- Document doc = getDocumentCreator().getDocument(file, dumpDirectory);
- writer.addDocument(doc);
+ Document doc = getDocumentCreator().getDocument(file, this.dumpDirectory);
+ this.writer.addDocument(doc);
} catch (Exception e) {
log.error(e);
}
- info.increase();
- log.info(info.printProgress());
+ this.info.increase();
+ log.info(this.info.printProgress());
}
}
/**
- * DOCUMENT ME!
+ * The factory for the index handler
*/
public class CreateIndexHandler extends IndexHandler {
/**
* Creates a new CreateIndexHandler object.
*
- * @param dumpDirectory DOCUMENT ME!
- * @param info DOCUMENT ME!
- * @param writer DOCUMENT ME!
+ * @param dumpDirectory The dump directory to use
+ * @param info The index information
+ * @param writer The index writer
*/
public CreateIndexHandler(File dumpDirectory, IndexInformation info, IndexWriter writer) {
super(dumpDirectory, info, writer);
@@ -352,6 +367,8 @@
/**
* Handles a file. Used when creating a new index.
+ * @param reader The reader
+ * @param file The file
*/
public void handleFile(IndexReader reader, File file) {
addFile(file);
@@ -359,15 +376,15 @@
}
/**
- * DOCUMENT ME!
+ * Class to update the index
*/
public class UpdateIndexHandler extends IndexHandler {
/**
* Creates a new UpdateIndexHandler object.
*
- * @param dumpDirectory DOCUMENT ME!
- * @param info DOCUMENT ME!
- * @param writer DOCUMENT ME!
+ * @param dumpDirectory The dump directory to use
+ * @param info The index information
+ * @param writer The index writer
*/
public UpdateIndexHandler(File dumpDirectory, IndexInformation info, IndexWriter writer) {
super(dumpDirectory, info, writer);
@@ -375,6 +392,9 @@
/**
* Handles a new document. Used when updating the index.
+ * @param reader The index reader
+ * @param term The term
+ * @param file The file
*/
public void handleNewDocument(IndexReader reader, Term term, File file) {
addFile(file);
Modified: lenya/trunk/src/java/org/apache/lenya/lucene/index/ConfigurableDocumentCreator.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/lenya/lucene/index/ConfigurableDocumentCreator.java?view=diff&r1=152681&r2=152682
==============================================================================
--- lenya/trunk/src/java/org/apache/lenya/lucene/index/ConfigurableDocumentCreator.java (original)
+++ lenya/trunk/src/java/org/apache/lenya/lucene/index/ConfigurableDocumentCreator.java Tue Feb 8 10:13:39 2005
@@ -26,13 +26,19 @@
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
+import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.FactoryConfigurationError;
+import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
@@ -42,57 +48,61 @@
import org.apache.lenya.lucene.parser.StringCleaner;
import org.apache.lenya.xml.DocumentHelper;
import org.apache.lenya.xml.NamespaceHelper;
-import org.apache.log4j.Category;
+import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
/**
* Uses XSLT to transform a XML into a Lucene document
*/
public class ConfigurableDocumentCreator extends AbstractDocumentCreator {
- Category log = Category.getInstance(ConfigurableDocumentCreator.class);
+ private static final Logger log = Logger.getLogger(ConfigurableDocumentCreator.class);
+ /**
+ * <code>LUCENE_NAMESPACE</code> The Lucene namespace
+ */
public static final String LUCENE_NAMESPACE = "http://apache.org/cocoon/lenya/lucene/1.0";
+ /**
+ * <code>XHTML_NAMESPACE</code> The XHTML namespace
+ */
public static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
/**
* Creates a new ConfigurableDocumentCreator object.
*
- * @param stylesheet DOCUMENT ME!
+ * @param _stylesheet The stylesheet to use to transform the Document into a Lucene document
*/
- public ConfigurableDocumentCreator(String stylesheet) {
- this.stylesheet = stylesheet;
+ public ConfigurableDocumentCreator(String _stylesheet) {
+ this.stylesheet = _stylesheet;
}
private String stylesheet;
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get the stylesheet
+ * @return The stylesheet
*/
public String getStylesheet() {
- return stylesheet;
+ return this.stylesheet;
}
/**
* Transform source document into lucene document and generate a Lucene Document instance
*
- * @param file DOCUMENT ME!
- * @param htdocsDumpDir DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
- * @throws Exception DOCUMENT ME!
+ * @param file The file
+ * @param htdocsDumpDir The dump directory
+ * @return The Lucene document
+ * @throws IOException if an error occurs
*/
- public Document getDocument(File file, File htdocsDumpDir) throws Exception {
+ public Document getDocument(File file, File htdocsDumpDir) throws IOException {
log.debug(".getDocument() : indexing " + file.getAbsolutePath());
- try {
+ try {
org.w3c.dom.Document sourceDocument = null;
DocumentBuilderFactory parserFactory = DocumentBuilderFactory.newInstance();
parserFactory.setValidating(false);
@@ -104,19 +114,16 @@
// FIXME: What is this good for: <?xml version="1.0"?><body>...</body>
/*
- NamespaceHelper documentHelper = new NamespaceHelper(XHTML_NAMESPACE, "xhtml", "html");
- org.w3c.dom.Document sourceDocument = documentHelper.getDocument();
+ NamespaceHelper documentHelper = new NamespaceHelper(XHTML_NAMESPACE, "xhtml", "html");
+ org.w3c.dom.Document sourceDocument = documentHelper.getDocument();
- Element rootNode = sourceDocument.getDocumentElement();
+ Element rootNode = sourceDocument.getDocumentElement();
- String bodyText = getBodyText(file);
- Element bodyElement = documentHelper.createElement("body", bodyText);
- rootNode.appendChild(bodyElement);
+ String bodyText = getBodyText(file);
+ Element bodyElement = documentHelper.createElement("body", bodyText);
+ rootNode.appendChild(bodyElement);
*/
-
-
-
DOMSource documentSource = new DOMSource(sourceDocument);
Writer documentWriter = new StringWriter();
@@ -163,31 +170,62 @@
}
return document;
- } catch (Exception e) {
- throw e;
+ } catch (final TransformerConfigurationException e) {
+ throw new IOException(e.toString());
+ } catch (final IllegalArgumentException e) {
+ throw new IOException(e.toString());
+ } catch (final SecurityException e) {
+ throw new IOException(e.toString());
+ } catch (final FactoryConfigurationError e) {
+ throw new IOException(e.toString());
+ } catch (final ParserConfigurationException e) {
+ throw new IOException(e.toString());
+ } catch (final SAXException e) {
+ throw new IOException(e.toString());
+ } catch (final IOException e) {
+ throw new IOException(e.toString());
+ } catch (final TransformerFactoryConfigurationError e) {
+ throw new IOException(e.toString());
+ } catch (final TransformerException e) {
+ throw new IOException(e.toString());
+ } catch (final NoSuchMethodException e) {
+ throw new IOException(e.toString());
+ } catch (final IllegalAccessException e) {
+ throw new IOException(e.toString());
+ } catch (final InvocationTargetException e) {
+ throw new IOException(e.toString());
}
}
/**
* Writes the lucene XML document to a file.
+ * @param file The file
+ * @param writer The writer
+ * @throws IOException if an IO error occurs
*/
protected void dumpLuceneDocument(File file, Writer writer) throws IOException {
- log.debug(".dumpLuceneDocument(): Dump document: " + file.getAbsolutePath());
+ FileWriter fileWriter = null;
+
+ try {
+ log.debug(".dumpLuceneDocument(): Dump document: " + file.getAbsolutePath());
- File luceneDocumentFile = new File(file.getAbsolutePath() + ".xluc");
- luceneDocumentFile.createNewFile();
+ File luceneDocumentFile = new File(file.getAbsolutePath() + ".xluc");
+ if (luceneDocumentFile.createNewFile()) log.debug("new lucene file created.");
- FileWriter fileWriter = new FileWriter(luceneDocumentFile);
- fileWriter.write(writer.toString());
- fileWriter.close();
+ fileWriter = new FileWriter(luceneDocumentFile);
+ fileWriter.write(writer.toString());
+ } catch (IOException e) {
+ log.error("IO Error " +e.toString());
+ } finally {
+ if (fileWriter != null)
+ fileWriter.close();
+ }
}
/**
- * DOCUMENT ME!
- *
- * @param node DOCUMENT ME!
- *
- * @return DOCUMENT ME!
+ * Get the text of a node
+ * @param node The node
+ * @return The text of the node
*/
public static String getText(Node node) {
StringBuffer result = new StringBuffer();
@@ -211,18 +249,14 @@
result.append(getText(subnode));
}
}
-
return result.toString();
}
/**
- * DOCUMENT ME!
- *
- * @param file DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- *
- * @throws Exception DOCUMENT ME!
+ * Get the body text of a file (if that file happens to be HTML)
+ * @param file The file
+ * @return The body text
+ * @throws Exception if an error occurs
*/
public static String getBodyText(File file) throws Exception {
HTMLParser parser = HTMLParserFactory.newInstance(file);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org