You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2004/09/05 23:55:51 UTC
cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test TestProbeLocator.java
burton 2004/09/05 14:55:51
Modified: feedparser TODO
feedparser/src/java/org/apache/commons/feedparser/locate
ProbeLocator.java
feedparser/src/java/org/apache/commons/feedparser/test
TestProbeLocator.java
Log:
1/2 of brads patches
Revision Changes Path
1.13 +6 -0 jakarta-commons-sandbox/feedparser/TODO
Index: TODO
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/TODO,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- TODO 3 Sep 2004 19:46:47 -0000 1.12
+++ TODO 5 Sep 2004 21:55:51 -0000 1.13
@@ -1,4 +1,10 @@
+- BUG:
+
+ Make SURE that all these tests
+
+ http://diveintomark.org/tests/client/autodiscovery/
+
- BUG: what happens when I put a comment after a UTF-16 BOM?!
- Support Base64 Atom values and the ability to enable them.
1.11 +28 -37 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
Index: ProbeLocator.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- ProbeLocator.java 2 Sep 2004 00:36:25 -0000 1.10
+++ ProbeLocator.java 5 Sep 2004 21:55:51 -0000 1.11
@@ -69,7 +69,7 @@
/**
* A regex to find any trailing filename and strip it
*/
- private static Pattern patternToStrip = Pattern.compile("/\\w*\\.\\w*$");
+ private static Pattern patternToStrip = Pattern.compile("[^/](/\\w*\\.\\w*$)");
/**
* A regex to extract the user from a Xanga URL
@@ -143,7 +143,8 @@
new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE),
new FeedReference("rss.xml", FeedReference.RSS_MEDIA_TYPE),
new FeedReference("index.rdf", FeedReference.RSS_MEDIA_TYPE),
- new FeedReference("index.xml", FeedReference.XML_MEDIA_TYPE) };
+ new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE),
+ new FeedReference("xml/rss.xml", FeedReference.RSS_MEDIA_TYPE) };
probeMapping.put( BlogService.BLOSXOM, blosxomLocations );
@@ -192,6 +193,7 @@
mapping = (FeedReference[])probeMapping.get( blogService );
log.info( "mapping = " + mapping );
+ log.info( "baseFeedPath = " + baseFeedPath );
// try out each mapping
for (int i = 0; i < mapping.length; i++) {
@@ -285,7 +287,9 @@
* and gets the path necessary to build up a feed, such as
* "http://www.codinginparadise.org/". Basicly it appends a slash to the end if there
* is not one, and removes any file names that might be at the end, such as
- * "myweblog.php". The one special exception is for some Blosxom blogs,
+ * "myweblog.php".
+ *
+ * There is a special exception for some Blosxom blogs,
* which have things inside of a cgi-script and 'hang' their RSS files
* off of this cgi-bin. For example,
* http://www.bitbucketheaven.com/cgi-bin/blosxom.cgi has its RSS file
@@ -297,31 +301,40 @@
*
* @author Brad Neuberg, bkn3@columbia.edu
*/
- protected static String getFeedPath(String resource)
+ protected static String getFeedPath( String resource )
throws MalformedURLException {
// strip off any query string or anchors
- int startStripAt = resource.indexOf("#");
+ int end = resource.lastIndexOf( "#" );
+
+ if ( end != -1 )
+ resource = resource.substring( 0, end );
- if ( startStripAt == -1 ) {
- startStripAt = resource.indexOf("?");
- }
- if (startStripAt != -1) {
- resource = resource.substring(0, startStripAt);
- }
+ end = resource.lastIndexOf( "?" );
- if (!resource.endsWith("blosxom.cgi")) {
+ if ( end != -1 )
+ resource = resource.substring( 0, end );
+
+ if ( ! resource.endsWith( "blosxom.cgi" ) ) {
Matcher fileMatcher = patternToStrip.matcher(resource);
resource = fileMatcher.replaceAll("");
}
- if ( !resource.endsWith( "/" ) ) {
+ if ( ! resource.endsWith( "/" ) ) {
resource = resource + "/";
}
return resource;
}
-
+
+ public static void main( String[] args ) throws Exception {
+
+ System.out.println( "asdf" );
+ System.out.println( getFeedPath( "http://foo.com/bar?cat=dog" ) );
+ System.out.println( getFeedPath( "http://foo.com/bar?cat=dog#adf" ) );
+
+ }
+
/** Does an HTTP HEAD to see if the given resource exists.
*
* @param resource The full URI to the resource to check for.
@@ -329,13 +342,8 @@
* @author Brad Neuberg, bkn3@columbia.edu
*/
protected static boolean feedExists(String resource) throws Exception {
-
- log.info( "Inside feedExists, resource = "+resource );
-
ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
- log.info( "request = "+request );
-
request.setRequestMethod( "HEAD" );
request.setFollowRedirects( false );
@@ -343,7 +351,7 @@
request.getContentLength();
long response = request.getResponseCode();
- log.info( "response = " + response );
+
return response == 200;
}
@@ -356,23 +364,6 @@
xangaMatcher.matches();
return xangaMatcher.group(1);
- }
-
- public static void main( String[] args ) throws Exception {
-
- FeedList list = new FeedList();
-
- locate( "http://davebarry.blogspot.com/", null, list );
- locate( "http://www.livejournal.com/users/jwz", null, list );
-
- Iterator it = list.iterator();
-
- while ( it.hasNext() ) {
-
- log.info( it.next() );
-
- }
-
}
}
1.3 +328 -3 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestProbeLocator.java
Index: TestProbeLocator.java
===================================================================
RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestProbeLocator.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- TestProbeLocator.java 2 Sep 2004 00:36:25 -0000 1.2
+++ TestProbeLocator.java 5 Sep 2004 21:55:51 -0000 1.3
@@ -336,7 +336,7 @@
assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
assertNull(feeds[0].title, null);
assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
- assertEquals(feeds[0].resource, "http://xanga.com/rss.aspx?user=joe");
+ assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=lithium98");
/* test through the FeedLocator */
list = FeedLocator.locate(resource);
atomFeed = list.getAdAtomFeed();
@@ -344,7 +344,82 @@
assertNull(atomFeed);
assertNotNull(rssFeed);
assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
- assertEquals(rssFeed.resource, "http://xanga.com/rss.aspx?user=joe");
+ assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=lithium98");
+
+ // This site should have an RSS feed
+ resource = "http://www.xanga.com/home.aspx?user=ChUnSA_86";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.XANGA);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=ChUnSA_86");
+ /* test through the FeedLocator */
+ list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=ChUnSA_86");
+
+ // This site should have an RSS feed
+ resource = "http://www.xanga.com/home.aspx?user=wdfphillz";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.XANGA);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=wdfphillz");
+ /* test through the FeedLocator */
+ list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=wdfphillz");
+
+ // This site should have an RSS feed
+ // FIXME: We should be able to pass this test when we
+ // expand resources inside of the Feed Parser; we don't
+ // currently do this yet, Brad Neuberg, bkn3@columbia.edu
+ /*resource = "http://xanga.com/home.aspx?user=joe";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.XANGA);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://xanga.com/rss.aspx?user=joe");*/
+ /* test through the FeedLocator */
+ /*list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://xanga.com/rss.aspx?user=joe");*/
}
public FeedList test( String resource,
@@ -681,6 +756,102 @@
public void testPMachine() throws Exception {
FeedList list = null;
String resource = null, content = null;
+
+ // This site should have a single RSS feed
+ resource = "http://bamph.com";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.UNKNOWN);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://bamph.com/index.xml");
+ /* test through the FeedLocator */
+ list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://bamph.com/index.xml");
+
+ // This site should have a single RSS feed
+ resource = "http://bamph.com";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.UNKNOWN);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://bamph.com/index.xml");
+ /* test through the FeedLocator */
+ list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://bamph.com/index.xml");
+
+ // This site should have a single RSS feed
+ resource = "http://bamph.com";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.UNKNOWN);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://bamph.com/index.xml");
+ /* test through the FeedLocator */
+ list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://bamph.com/index.xml");
+
+ // This site should have a single RSS feed
+ resource = "http://bamph.com";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.UNKNOWN);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://bamph.com/index.xml");
+ /* test through the FeedLocator */
+ list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://bamph.com/index.xml");
BlogService blogService = null;
FeedReference feeds[] = null;
FeedReference rssFeed, atomFeed;
@@ -756,6 +927,30 @@
assertNotNull(rssFeed);
assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
assertEquals(rssFeed.resource, "http://www.mondfish.net/pmachine/index.xml");
+
+ // This site should have a single RSS feed
+ resource = "http://bamph.com";
+ content = getContent(resource);
+ assertNotNull(content);
+ blogService = BlogServiceDiscovery.discover(resource, content);
+ assertEquals(blogService, BlogService.UNKNOWN);
+ list = new FeedList();
+ ProbeLocator.locate(resource, content, list);
+ assertEquals(list.size(), 1);
+ feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
+ assertEquals(feeds.length, 1);
+ assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
+ assertNull(feeds[0].title, null);
+ assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(feeds[0].resource, "http://bamph.com/index.xml");
+ /* test through the FeedLocator */
+ list = FeedLocator.locate(resource);
+ atomFeed = list.getAdAtomFeed();
+ rssFeed = list.getAdRSSFeed();
+ assertNull(atomFeed);
+ assertNotNull(rssFeed);
+ assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
+ assertEquals(rssFeed.resource, "http://bamph.com/index.xml");
}
public void testBlosxom() throws Exception {
@@ -831,6 +1026,32 @@
/* test through the FeedLocator */
list = FeedLocator.locate(resource);
atomFeed = list.getAdAtomFeed();
+ public static void main( String[] args ) throws Exception {
+
+ TestProbeLocator test = new TestProbeLocator( null );
+
+ /*test.test( "http://xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );
+
+ test.test( "http://www.xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );*/
+
+ test.testBlogger();
+ test.testLiveJournal();
+ test.testDiaryLand();
+ test.testMovableType();
+ test.testXanga();
+ test.testWordPress();
+ test.testAOLJournal();
+ test.testTypePad();
+ test.testGreyMatter();
+ test.testPMachine();
+ test.testBlosxom();
+ test.testRadioUserland();
+ test.testTextPattern();
+ }
rssFeed = list.getAdRSSFeed();
assertNull(atomFeed);
assertNotNull(rssFeed);
@@ -855,6 +1076,32 @@
assertEquals(feeds[0].resource, resource + "/index.rss");
/* test through the FeedLocator */
list = FeedLocator.locate(resource);
+ public static void main( String[] args ) throws Exception {
+
+ TestProbeLocator test = new TestProbeLocator( null );
+
+ /*test.test( "http://xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );
+
+ test.test( "http://www.xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );*/
+
+ test.testBlogger();
+ test.testLiveJournal();
+ test.testDiaryLand();
+ test.testMovableType();
+ test.testXanga();
+ test.testWordPress();
+ test.testAOLJournal();
+ test.testTypePad();
+ test.testGreyMatter();
+ test.testPMachine();
+ test.testBlosxom();
+ test.testRadioUserland();
+ test.testTextPattern();
+ }
atomFeed = list.getAdAtomFeed();
rssFeed = list.getAdRSSFeed();
assertNull(atomFeed);
@@ -879,6 +1126,32 @@
assertEquals(blogService, BlogService.RADIO_USERLAND);
list = new FeedList();
ProbeLocator.locate(resource, content, list);
+ public static void main( String[] args ) throws Exception {
+
+ TestProbeLocator test = new TestProbeLocator( null );
+
+ /*test.test( "http://xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );
+
+ test.test( "http://www.xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );*/
+
+ test.testBlogger();
+ test.testLiveJournal();
+ test.testDiaryLand();
+ test.testMovableType();
+ test.testXanga();
+ test.testWordPress();
+ test.testAOLJournal();
+ test.testTypePad();
+ test.testGreyMatter();
+ test.testPMachine();
+ test.testBlosxom();
+ test.testRadioUserland();
+ test.testTextPattern();
+ }
assertEquals(list.size(), 1);
feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
assertEquals(feeds.length, 1);
@@ -903,6 +1176,32 @@
assertEquals(blogService, BlogService.RADIO_USERLAND);
list = new FeedList();
ProbeLocator.locate(resource, content, list);
+ public static void main( String[] args ) throws Exception {
+
+ TestProbeLocator test = new TestProbeLocator( null );
+
+ /*test.test( "http://xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );
+
+ test.test( "http://www.xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );*/
+
+ test.testBlogger();
+ test.testLiveJournal();
+ test.testDiaryLand();
+ test.testMovableType();
+ test.testXanga();
+ test.testWordPress();
+ test.testAOLJournal();
+ test.testTypePad();
+ test.testGreyMatter();
+ test.testPMachine();
+ test.testBlosxom();
+ test.testRadioUserland();
+ test.testTextPattern();
+ }
assertEquals(list.size(), 1);
feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
assertEquals(feeds.length, 1);
@@ -1002,5 +1301,31 @@
return results.toString();
}
+ public static void main( String[] args ) throws Exception {
+
+ TestProbeLocator test = new TestProbeLocator( null );
+
+ /*test.test( "http://xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );
+
+ test.test( "http://www.xanga.com/home.aspx?user=joe",
+ BlogService.XANGA,
+ 1 );*/
+
+ test.testBlogger();
+ test.testLiveJournal();
+ test.testDiaryLand();
+ test.testMovableType();
+ test.testXanga();
+ test.testWordPress();
+ test.testAOLJournal();
+ test.testTypePad();
+ test.testGreyMatter();
+ test.testPMachine();
+ test.testBlosxom();
+ test.testRadioUserland();
+ test.testTextPattern();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org