You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ry...@apache.org on 2008/10/02 22:52:34 UTC

svn commit: r701239 - in /labs/droids/branch/LABS-144/src: core/java/org/apache/droids/ core/java/org/apache/droids/api/ plugins/java/org/apache/droids/parse/html/ robots/java/org/apache/droids/crawler/

Author: ryan
Date: Thu Oct  2 13:52:34 2008
New Revision: 701239

URL: http://svn.apache.org/viewvc?rev=701239&view=rev
Log:
making Link and Outlink the same class...  no need to over complicate things :)

Removed:
    labs/droids/branch/LABS-144/src/core/java/org/apache/droids/OutlinkTask.java
    labs/droids/branch/LABS-144/src/core/java/org/apache/droids/api/Outlink.java
Modified:
    labs/droids/branch/LABS-144/src/core/java/org/apache/droids/LinkTask.java
    labs/droids/branch/LABS-144/src/core/java/org/apache/droids/ParseData.java
    labs/droids/branch/LABS-144/src/core/java/org/apache/droids/api/Link.java
    labs/droids/branch/LABS-144/src/plugins/java/org/apache/droids/parse/html/HtmlParser.java
    labs/droids/branch/LABS-144/src/robots/java/org/apache/droids/crawler/CrawlingWorker.java

Modified: labs/droids/branch/LABS-144/src/core/java/org/apache/droids/LinkTask.java
URL: http://svn.apache.org/viewvc/labs/droids/branch/LABS-144/src/core/java/org/apache/droids/LinkTask.java?rev=701239&r1=701238&r2=701239&view=diff
==============================================================================
--- labs/droids/branch/LABS-144/src/core/java/org/apache/droids/LinkTask.java (original)
+++ labs/droids/branch/LABS-144/src/core/java/org/apache/droids/LinkTask.java Thu Oct  2 13:52:34 2008
@@ -30,6 +30,7 @@
   
   private Date lastModifedDate;
   private String[] linksTo;
+  private String anchorText;
   
   public LinkTask( Link from, URI uri, int depth )
   {
@@ -78,4 +79,12 @@
   public URI getURI() {
     return uri;
   }
+
+  public String getAnchorText() {
+    return anchorText;
+  }
+
+  public void setAnchorText(String anchorText) {
+    this.anchorText = anchorText;
+  }
 }
\ No newline at end of file

Modified: labs/droids/branch/LABS-144/src/core/java/org/apache/droids/ParseData.java
URL: http://svn.apache.org/viewvc/labs/droids/branch/LABS-144/src/core/java/org/apache/droids/ParseData.java?rev=701239&r1=701238&r2=701239&view=diff
==============================================================================
--- labs/droids/branch/LABS-144/src/core/java/org/apache/droids/ParseData.java (original)
+++ labs/droids/branch/LABS-144/src/core/java/org/apache/droids/ParseData.java Thu Oct  2 13:52:34 2008
@@ -16,7 +16,7 @@
  */
 package org.apache.droids;
 
-import org.apache.droids.api.Outlink;
+import org.apache.droids.api.Link;
 
 /**
  * The result object that are filled by a parser
@@ -25,7 +25,7 @@
  * 
  */
 public class ParseData {
-  private Outlink[] outlinks;
+  private Link[] outlinks;
 
   /**
    * Create a new instance of Parse data for the given outlinks
@@ -33,7 +33,7 @@
    * @param outlinks
    *                the array of outgoing links
    */
-  public ParseData(Outlink[] outlinks) {
+  public ParseData(Link[] outlinks) {
     this.outlinks = outlinks.clone();
   }
 
@@ -42,7 +42,7 @@
    * 
    * @return all outlinks 
    */
-  public Outlink[] getOutlinks() {
+  public Link[] getOutlinks() {
     return outlinks.clone();
   }
 }

Modified: labs/droids/branch/LABS-144/src/core/java/org/apache/droids/api/Link.java
URL: http://svn.apache.org/viewvc/labs/droids/branch/LABS-144/src/core/java/org/apache/droids/api/Link.java?rev=701239&r1=701238&r2=701239&view=diff
==============================================================================
--- labs/droids/branch/LABS-144/src/core/java/org/apache/droids/api/Link.java (original)
+++ labs/droids/branch/LABS-144/src/core/java/org/apache/droids/api/Link.java Thu Oct  2 13:52:34 2008
@@ -32,6 +32,11 @@
    * @return the URI to this link
    */
   URI getURI();
+
+  /**
+   * @return the Anchor text for this link
+   */
+  String getAnchorText();
   
   /**
    * From where the link was created

Modified: labs/droids/branch/LABS-144/src/plugins/java/org/apache/droids/parse/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/labs/droids/branch/LABS-144/src/plugins/java/org/apache/droids/parse/html/HtmlParser.java?rev=701239&r1=701238&r2=701239&view=diff
==============================================================================
--- labs/droids/branch/LABS-144/src/plugins/java/org/apache/droids/parse/html/HtmlParser.java (original)
+++ labs/droids/branch/LABS-144/src/plugins/java/org/apache/droids/parse/html/HtmlParser.java Thu Oct  2 13:52:34 2008
@@ -29,8 +29,7 @@
 import org.apache.droids.api.Parse;
 import org.apache.droids.api.Parser;
 import org.apache.droids.helper.Loggable;
-import org.apache.droids.api.Outlink;
-import org.apache.droids.OutlinkTask;
+import org.apache.droids.LinkTask;
 import org.apache.droids.ParseData;
 import org.apache.droids.parse.ParseImpl;
 import org.apache.html.dom.HTMLDocumentImpl;
@@ -93,14 +92,13 @@
   }
 
   private ParseData extract(DocumentFragment node) {
-    final ArrayList<Outlink> links = new ArrayList<Outlink>();
+    final ArrayList<Link> links = new ArrayList<Link>();
     try {
       extractLinks(node, links, new HashSet<String>());
     } catch (MalformedURLException e) {
       log.fatal(e);
     }
-    Outlink[] outlinks = new Outlink[0];
-    outlinks = links.toArray(new Outlink[links.size()]);
+    Link[] outlinks = links.toArray(new Link[links.size()]);
     return new ParseData(outlinks);
   }
 
@@ -138,7 +136,7 @@
     return remover;
   }
 
-  private void extractLinks(Node node, ArrayList<Outlink> links,
+  private void extractLinks(Node node, ArrayList<Link> links,
       HashSet<String> set) throws MalformedURLException {
     if (node.getNodeType() == Node.ELEMENT_NODE) {
       String nodeName = node.getNodeName().toLowerCase();
@@ -166,7 +164,7 @@
                 // Link from, URI uri, int depth, String text
                 String url = target.contains(":/") ? target : newUrl;
                 URI uri = new URI( url );
-                final OutlinkTask outlink = new OutlinkTask( link, uri, null );
+                final LinkTask outlink = new LinkTask( link, uri, link.getDepth()+1 );
                 log.debug("set size: "+set.size());
                 log.debug("outlink.getToUrl(): "+outlink.getURI());
                 log.debug("set.contains(outlink.getToUrl(): "+set.contains(url));

Modified: labs/droids/branch/LABS-144/src/robots/java/org/apache/droids/crawler/CrawlingWorker.java
URL: http://svn.apache.org/viewvc/labs/droids/branch/LABS-144/src/robots/java/org/apache/droids/crawler/CrawlingWorker.java?rev=701239&r1=701238&r2=701239&view=diff
==============================================================================
--- labs/droids/branch/LABS-144/src/robots/java/org/apache/droids/crawler/CrawlingWorker.java (original)
+++ labs/droids/branch/LABS-144/src/robots/java/org/apache/droids/crawler/CrawlingWorker.java Thu Oct  2 13:52:34 2008
@@ -60,7 +60,7 @@
           else {
             Parse parse = parser.getParse(openStream, link);
             if( parse.getData() != null ) {
-              Collection<Outlink> outlinks = getFilteredOutlinks( parse );
+              Collection<Link> outlinks = getFilteredOutlinks( parse );
               droid.getQueue().merge( outlinks );
             }
             handle( parse, openStream, link );
@@ -97,14 +97,14 @@
     }
   }
   
-  protected Collection<Outlink> getFilteredOutlinks( Parse parse )
+  protected Collection<Link> getFilteredOutlinks( Parse parse )
   {
-    Outlink[] links = parse.getData().getOutlinks();
+    Link[] links = parse.getData().getOutlinks();
     // new cleaned list
     URLFiltersFactory filters = droid.getFiltersFactory();
     // TODO -- make the hashvalue for Outlink...
-    Map<String,Outlink> filtered = new HashMap<String,Outlink>();
-    for( Outlink outlink : links ) {
+    Map<String,Link> filtered = new HashMap<String,Link>();
+    for( Link outlink : links ) {
       String id = outlink.getId();
       if (filters.accept(outlink.getId()) && !filtered.containsKey(id)) {
         filtered.put(id,outlink);



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org