You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2011/10/14 17:55:52 UTC
svn commit: r1183398 - in /incubator/lcf/trunk: CHANGES.txt
connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java
Author: kwright
Date: Fri Oct 14 15:55:52 2011
New Revision: 1183398
URL: http://svn.apache.org/viewvc?rev=1183398&view=rev
Log:
Fix for CONNECTORS-273. Added last-modified metadata to the Wiki connector.
Modified:
incubator/lcf/trunk/CHANGES.txt
incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java
Modified: incubator/lcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/CHANGES.txt?rev=1183398&r1=1183397&r2=1183398&view=diff
==============================================================================
--- incubator/lcf/trunk/CHANGES.txt (original)
+++ incubator/lcf/trunk/CHANGES.txt Fri Oct 14 15:55:52 2011
@@ -3,6 +3,10 @@ $Id$
======================= 0.4-dev =====================
+CONNECTORS-273: Add last-modified metadata to indexing for the
+Wiki connector.
+(Tobias Wunderlich, Karl Wright)
+
CONNECTORS-274: Fix long-standing problem with XML parsing, which
affected the wiki connector in a big way.
(Tobias Wunderlich, Karl Wright)
Modified: incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1183398&r1=1183397&r2=1183398&view=diff
==============================================================================
--- incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original)
+++ incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Fri Oct 14 15:55:52 2011
@@ -2054,6 +2054,7 @@ public class WikiConnector extends org.a
String author = t.getAuthor();
String comment = t.getComment();
String title = t.getTitle();
+ String lastModified = t.getLastModified();
RepositoryDocument rd = new RepositoryDocument();
dataSize = contentFile.length();
@@ -2067,6 +2068,8 @@ public class WikiConnector extends org.a
rd.addField("author",author);
if (title != null)
rd.addField("title",title);
+ if (lastModified != null)
+ rd.addField("last-modified",lastModified);
activities.ingestDocument(documentIdentifier,documentVersion,fullURL,rd);
}
finally
@@ -2157,7 +2160,7 @@ public class WikiConnector extends org.a
}
}
- /** Thread to execute a "get timestamp" operation. This thread both executes the operation and parses the result. */
+ /** Thread to execute a "get doc info" operation. This thread both executes the operation and parses the result. */
protected static class ExecuteGetDocInfoThread extends Thread
{
protected HttpClient client;
@@ -2168,6 +2171,7 @@ public class WikiConnector extends org.a
protected String author = null;
protected String title = null;
protected String comment = null;
+ protected String lastModified = null;
protected String statusCode = null;
protected String errorMessage = null;
@@ -2221,6 +2225,7 @@ public class WikiConnector extends org.a
title = c.getTitle();
author = c.getAuthor();
comment = c.getComment();
+ lastModified = c.getLastModified();
statusCode = "OK";
}
catch (IOException e)
@@ -2290,6 +2295,11 @@ public class WikiConnector extends org.a
{
return title;
}
+
+ public String getLastModified()
+ {
+ return lastModified;
+ }
public void cleanup()
{
@@ -2308,7 +2318,7 @@ public class WikiConnector extends org.a
protected String getGetDocInfoURL(String documentIdentifier)
throws ManifoldCFException
{
- return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=user%7ccomment%7ccontent";
+ return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=user%7ccomment%7ccontent%7ctimestamp";
}
/** Class representing the "api" context of a "get doc info" response */
@@ -2322,6 +2332,8 @@ public class WikiConnector extends org.a
protected String author = null;
/** Comment */
protected String comment = null;
+ /** Last modified */
+ protected String lastModified = null;
public WikiGetDocInfoAPIContext(XMLStream theStream)
{
@@ -2342,6 +2354,7 @@ public class WikiConnector extends org.a
contentFile = pc.getContentFile();
author = pc.getAuthor();
comment = pc.getComment();
+ lastModified = pc.getLastModified();
}
protected void tagCleanup()
@@ -2371,6 +2384,11 @@ public class WikiConnector extends org.a
{
return author;
}
+
+ public String getLastModified()
+ {
+ return lastModified;
+ }
public String getComment()
{
@@ -2390,6 +2408,8 @@ public class WikiConnector extends org.a
protected String author = null;
/** Comment */
protected String comment = null;
+ /** Last modified */
+ protected String lastModified = null;
public WikiGetDocInfoQueryContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
{
@@ -2410,6 +2430,7 @@ public class WikiConnector extends org.a
contentFile = pc.getContentFile();
author = pc.getAuthor();
comment = pc.getComment();
+ lastModified = pc.getLastModified();
}
protected void tagCleanup()
@@ -2439,6 +2460,11 @@ public class WikiConnector extends org.a
{
return author;
}
+
+ public String getLastModified()
+ {
+ return lastModified;
+ }
public String getComment()
{
@@ -2458,6 +2484,8 @@ public class WikiConnector extends org.a
protected String author = null;
/** Comment */
protected String comment = null;
+ /** Last modified */
+ protected String lastModified = null;
public WikiGetDocInfoPagesContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
{
@@ -2477,6 +2505,7 @@ public class WikiConnector extends org.a
title = pc.getTitle();
contentFile = pc.getContentFile();
author = pc.getAuthor();
+ lastModified = pc.getLastModified();
comment = pc.getComment();
}
@@ -2507,6 +2536,11 @@ public class WikiConnector extends org.a
{
return author;
}
+
+ public String getLastModified()
+ {
+ return lastModified;
+ }
public String getComment()
{
@@ -2526,6 +2560,8 @@ public class WikiConnector extends org.a
protected String author = null;
/** Comment */
protected String comment = null;
+ /** Last modified */
+ protected String lastModified = null;
public WikiGetDocInfoPageContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
{
@@ -2556,6 +2592,7 @@ public class WikiConnector extends org.a
contentFile = rc.getContentFile();
author = rc.getAuthor();
comment = rc.getComment();
+ lastModified = rc.getLastModified();
}
super.endTag();
}
@@ -2592,6 +2629,11 @@ public class WikiConnector extends org.a
{
return comment;
}
+
+ public String getLastModified()
+ {
+ return lastModified;
+ }
}
@@ -2601,6 +2643,7 @@ public class WikiConnector extends org.a
protected File contentFile = null;
protected String author = null;
protected String comment = null;
+ protected String lastModified = null;
public WikiGetDocInfoRevisionsContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
{
@@ -2620,6 +2663,7 @@ public class WikiConnector extends org.a
contentFile = rc.getContentFile();
author = rc.getAuthor();
comment = rc.getComment();
+ lastModified = rc.getLastModified();
}
protected void tagCleanup()
@@ -2650,6 +2694,10 @@ public class WikiConnector extends org.a
return comment;
}
+ public String getLastModified()
+ {
+ return lastModified;
+ }
}
/** Class looking for the "api/query/pages/page/revisions/rev" context of a "get doc info" response */
@@ -2658,6 +2706,7 @@ public class WikiConnector extends org.a
protected String author = null;
protected String comment = null;
protected File contentFile = null;
+ protected String lastModified = null;
public WikiGetDocInfoRevContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
{
@@ -2671,6 +2720,7 @@ public class WikiConnector extends org.a
{
author = atts.getValue("user");
comment = atts.getValue("comment");
+ lastModified = atts.getValue("timestamp");
try
{
File tempFile = File.createTempFile("_wikidata_","tmp");
@@ -2723,6 +2773,11 @@ public class WikiConnector extends org.a
{
return author;
}
+
+ public String getLastModified()
+ {
+ return lastModified;
+ }
public String getComment()
{
Modified: incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java?rev=1183398&r1=1183397&r2=1183398&view=diff
==============================================================================
--- incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java (original)
+++ incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java Fri Oct 14 15:55:52 2011
@@ -139,7 +139,7 @@ public class MockWikiService
if (resourceName == null)
throw new IOException("Could not find a matching resource for the timestamp parameters; pageids = '"+pageIds+"'");
}
- else if (rvprop != null && rvprop.equals("user|comment|content"))
+ else if (rvprop != null && rvprop.equals("user|comment|content|timestamp"))
{
// Doc info query
if (pageIds == null)
@@ -148,7 +148,7 @@ public class MockWikiService
throw new IOException("cannot do more than one docinfo request at once");
resourceName = docInfoQueryResources.get(pageIds);
if (resourceName == null)
- throw new IOException("Could not find a matching resource for the user|comment|content parameters; pageids = '"+pageIds+"'");
+ throw new IOException("Could not find a matching resource for the user|comment|content|timestamp parameters; pageids = '"+pageIds+"'");
}
else
throw new IOException("rvprop parameter missing or incorrect: "+rvprop);