You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2004/04/15 18:58:15 UTC
cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate LinkLocator.java
burton 2004/04/15 09:58:15
Added: feedparser/src/java/org/apache/commons/feedparser/locate
LinkLocator.java
Log:
more feedlocator support...
Revision Changes Path
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java
Index: LinkLocator.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.locate;
import java.io.*;
import java.util.*;
import java.net.*;
/**
*
* @author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
*/
public class LinkLocator {
/**
*
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static final List locate( String resource, String content, List list )
throws Exception {
AnchorParserListener listener = new AnchorParserListener() {
String resource = null;
String site = null;
ArrayList list = new ArrayList();
HashSet seen = new HashSet();
public void setContext( Object context ) {
resource = (String)context;
//pass in the resource of the blog
site = getSite( resource );
}
public Object getResult() {
return list;
}
public boolean onAnchor( String href, String rel, String title ) {
String current = ResourceExpander.expand( resource, href );
if ( current == null )
return true; //obviously not
//this is on a different site.
if ( ! getSite( current ).equals( site ) ) {
return true;
}
//Radio style feed. Screw that.
if ( current.startsWith( "http://127" ) )
return true;
if ( seen.contains( current ) ) {
return true;
}
seen.add( current );
//FIXME: we should assert tha that these feeds are from the SAME
//domain not a link to another feed.
if ( current.endsWith( ".xml" ) ||
current.endsWith( ".rss" ) ||
current.endsWith( ".rdf" ) ) {
//feed for this blog.
list.add( current );
return true;
}
//for coderman's blog at http://www.peertech.org
if ( current.endsWith( "/node/feed" ) )
list.add( current );
return true;
}
};
listener.setContext( resource );
AnchorParser.parseAnchors( content, listener );
return null;
}
public static String getSite( String resource ) {
try {
String site = new URL( resource ).getHost();
return site.replaceAll( "http://www", "http://" );
} catch ( MalformedURLException e ) {
return null;
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org