You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/02/16 11:41:52 UTC

svn commit: r378219 - in /lucene/nutch/trunk/src/plugin: clustering-carrot2/ clustering-carrot2/lib/ lib-nekohtml/ lib-nekohtml/lib/ parse-html/ parse-html/lib/

Author: jerome
Date: Thu Feb 16 02:41:49 2006
New Revision: 378219

URL: http://svn.apache.org/viewcvs?rev=378219&view=rev
Log:
NUTCH-196 : Add neko html library (lib-nekohtml)

Added:
    lucene/nutch/trunk/src/plugin/lib-nekohtml/
    lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml   (with props)
    lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/
    lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar   (with props)
    lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml   (with props)
Removed:
    lucene/nutch/trunk/src/plugin/clustering-carrot2/lib/nekohtml-0.9.2.jar
    lucene/nutch/trunk/src/plugin/parse-html/lib/nekohtml-0.9.4.LICENSE.txt
    lucene/nutch/trunk/src/plugin/parse-html/lib/nekohtml-0.9.4.jar
Modified:
    lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml
    lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
    lucene/nutch/trunk/src/plugin/parse-html/build.xml
    lucene/nutch/trunk/src/plugin/parse-html/plugin.xml

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml Thu Feb 16 02:41:49 2006
@@ -7,7 +7,7 @@
   <path id="plugin.deps">
     <fileset dir="${nutch.root}/build">
       <include name="**/lib-log4j/*.jar" />
-      <include name="**/lib-commons-httpclient/*.jar" />
+      <include name="**/lib-nekohtml/*.jar" />
     </fileset>
   </path>
 

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml Thu Feb 16 02:41:49 2006
@@ -22,15 +22,12 @@
       <library name="commons-pool-1.1.jar"/>
       <library name="FSA.jar"/>
       <library name="Jama-1.0.1-patched.jar"/>
-      <library name="log4j-1.2.8.jar"/>
-
-      <library name="nekohtml-0.9.2.jar"/>
    </runtime>
 
    <requires>
       <import plugin="nutch-extensionpoints"/>
       <import plugin="lib-log4j"/>
-      <import plugin="lib-commons-httpclient"/>
+      <import plugin="lib-nekohtml"/>
    </requires>
 
    <extension id="org.apache.nutch.clustering.carrot2"

Added: lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml?rev=378219&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml (added)
+++ lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml Thu Feb 16 02:41:49 2006
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+
+<project name="lib-nekohtml" default="jar">
+
+  <import file="../build-plugin.xml"/>
+
+  <!--
+   ! Override the compile and jar targets,
+   ! since there is nothing to compile here.                                     
+   ! -->
+  <target name="compile" depends="init">
+    <echo message="Compiling plugin: ${name}"/>
+  </target>
+
+  <target name="jar" depends="compile">
+    <copy todir="${build.dir}" verbose="true">
+      <fileset dir="./lib" includes="**/*.jar"/>
+    </copy>
+  </target>
+
+</project>

Propchange: lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar?rev=378219&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml?rev=378219&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml (added)
+++ lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml Thu Feb 16 02:41:49 2006
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ! NekoHTML is a simple HTML scanner and tag balancer.
+ ! (http://people.apache.org/~andyc/neko/doc/html/index.html)
+ ! 
+ ! Dowload : http://people.apache.org/~andyc/neko/doc/html/index.html
+ ! License : http://people.apache.org/~andyc/neko/LICENSE
+ !-->
+<plugin
+   id="lib-nekohtml"
+   name="CyberNeko HTML Parser"
+   version="0.9.4"
+   provider-name="org.cyberneko">
+
+   <runtime>
+     <library name="nekohtml-0.9.4.jar">
+        <export name="*"/>
+     </library>
+   </runtime>
+
+</plugin>

Propchange: lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/nutch/trunk/src/plugin/parse-html/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/build.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/build.xml Thu Feb 16 02:41:49 2006
@@ -4,4 +4,10 @@
 
   <import file="../build-plugin.xml"/>
 
+  <path id="plugin.deps">
+    <fileset dir="${nutch.root}/build">
+      <include name="**/lib-nekohtml/*.jar" />
+    </fileset>
+  </path>
+
 </project>

Modified: lucene/nutch/trunk/src/plugin/parse-html/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/plugin.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/plugin.xml Thu Feb 16 02:41:49 2006
@@ -9,12 +9,12 @@
       <library name="parse-html.jar">
          <export name="*"/>
       </library>
-      <library name="nekohtml-0.9.4.jar"/>
       <library name="tagsoup-1.0rc3.jar"/>
    </runtime>
 
    <requires>
       <import plugin="nutch-extensionpoints"/>
+      <import plugin="lib-nekohtml"/>
    </requires>
 
    <extension id="org.apache.nutch.parse.html"