You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/02/16 11:41:52 UTC
svn commit: r378219 - in /lucene/nutch/trunk/src/plugin: clustering-carrot2/
clustering-carrot2/lib/ lib-nekohtml/ lib-nekohtml/lib/ parse-html/
parse-html/lib/
Author: jerome
Date: Thu Feb 16 02:41:49 2006
New Revision: 378219
URL: http://svn.apache.org/viewcvs?rev=378219&view=rev
Log:
NUTCH-196 : Add neko html library (lib-nekohtml)
Added:
lucene/nutch/trunk/src/plugin/lib-nekohtml/
lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml (with props)
lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/
lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar (with props)
lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml (with props)
Removed:
lucene/nutch/trunk/src/plugin/clustering-carrot2/lib/nekohtml-0.9.2.jar
lucene/nutch/trunk/src/plugin/parse-html/lib/nekohtml-0.9.4.LICENSE.txt
lucene/nutch/trunk/src/plugin/parse-html/lib/nekohtml-0.9.4.jar
Modified:
lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml
lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
lucene/nutch/trunk/src/plugin/parse-html/build.xml
lucene/nutch/trunk/src/plugin/parse-html/plugin.xml
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml Thu Feb 16 02:41:49 2006
@@ -7,7 +7,7 @@
<path id="plugin.deps">
<fileset dir="${nutch.root}/build">
<include name="**/lib-log4j/*.jar" />
- <include name="**/lib-commons-httpclient/*.jar" />
+ <include name="**/lib-nekohtml/*.jar" />
</fileset>
</path>
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml Thu Feb 16 02:41:49 2006
@@ -22,15 +22,12 @@
<library name="commons-pool-1.1.jar"/>
<library name="FSA.jar"/>
<library name="Jama-1.0.1-patched.jar"/>
- <library name="log4j-1.2.8.jar"/>
-
- <library name="nekohtml-0.9.2.jar"/>
</runtime>
<requires>
<import plugin="nutch-extensionpoints"/>
<import plugin="lib-log4j"/>
- <import plugin="lib-commons-httpclient"/>
+ <import plugin="lib-nekohtml"/>
</requires>
<extension id="org.apache.nutch.clustering.carrot2"
Added: lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml?rev=378219&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml (added)
+++ lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml Thu Feb 16 02:41:49 2006
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+
+<project name="lib-nekohtml" default="jar">
+
+ <import file="../build-plugin.xml"/>
+
+ <!--
+ ! Override the compile and jar targets,
+ ! since there is nothing to compile here.
+ ! -->
+ <target name="compile" depends="init">
+ <echo message="Compiling plugin: ${name}"/>
+ </target>
+
+ <target name="jar" depends="compile">
+ <copy todir="${build.dir}" verbose="true">
+ <fileset dir="./lib" includes="**/*.jar"/>
+ </copy>
+ </target>
+
+</project>
Propchange: lucene/nutch/trunk/src/plugin/lib-nekohtml/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar?rev=378219&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/nutch/trunk/src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml?rev=378219&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml (added)
+++ lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml Thu Feb 16 02:41:49 2006
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ! NekoHTML is a simple HTML scanner and tag balancer.
+ ! (http://people.apache.org/~andyc/neko/doc/html/index.html)
+ !
+ ! Dowload : http://people.apache.org/~andyc/neko/doc/html/index.html
+ ! License : http://people.apache.org/~andyc/neko/LICENSE
+ !-->
+<plugin
+ id="lib-nekohtml"
+ name="CyberNeko HTML Parser"
+ version="0.9.4"
+ provider-name="org.cyberneko">
+
+ <runtime>
+ <library name="nekohtml-0.9.4.jar">
+ <export name="*"/>
+ </library>
+ </runtime>
+
+</plugin>
Propchange: lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/nutch/trunk/src/plugin/parse-html/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/build.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/build.xml Thu Feb 16 02:41:49 2006
@@ -4,4 +4,10 @@
<import file="../build-plugin.xml"/>
+ <path id="plugin.deps">
+ <fileset dir="${nutch.root}/build">
+ <include name="**/lib-nekohtml/*.jar" />
+ </fileset>
+ </path>
+
</project>
Modified: lucene/nutch/trunk/src/plugin/parse-html/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/plugin.xml?rev=378219&r1=378218&r2=378219&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/plugin.xml Thu Feb 16 02:41:49 2006
@@ -9,12 +9,12 @@
<library name="parse-html.jar">
<export name="*"/>
</library>
- <library name="nekohtml-0.9.4.jar"/>
<library name="tagsoup-1.0rc3.jar"/>
</runtime>
<requires>
<import plugin="nutch-extensionpoints"/>
+ <import plugin="lib-nekohtml"/>
</requires>
<extension id="org.apache.nutch.parse.html"