You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by si...@apache.org on 2012/07/17 23:48:30 UTC

svn commit: r1362675 [13/26] - in /incubator/any23/site: ./ apache-any23-core/ apache-any23-core/css/ apache-any23-core/images/ apache-any23-core/images/logos/ apache-any23-core/images/profiles/ apache-any23-core/js/ apache-any23-service/ apache-any23-...

Added: incubator/any23/site/images/any23-overall.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/any23-overall.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/any23-overall.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/images/apache-tika-90x30.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/apache-tika-90x30.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/apache-tika-90x30.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/images/fu-logo-90x25.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/fu-logo-90x25.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/fu-logo-90x25.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/images/kit-logo-90x40.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/kit-logo-90x40.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/kit-logo-90x40.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/images/logo-lod2-90x30.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/logo-lod2-90x30.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/logo-lod2-90x30.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/images/profiles/pre-release.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/profiles/pre-release.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/profiles/pre-release.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/images/profiles/retired.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/profiles/retired.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/profiles/retired.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/images/profiles/sandbox.png
URL: http://svn.apache.org/viewvc/incubator/any23/site/images/profiles/sandbox.png?rev=1362675&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/any23/site/images/profiles/sandbox.png
------------------------------------------------------------------------------
    svn:mime-type = image/png

Added: incubator/any23/site/plugin-basic-crawler.html
URL: http://svn.apache.org/viewvc/incubator/any23/site/plugin-basic-crawler.html?rev=1362675&view=auto
==============================================================================
--- incubator/any23/site/plugin-basic-crawler.html (added)
+++ incubator/any23/site/plugin-basic-crawler.html Tue Jul 17 21:48:21 2012
@@ -0,0 +1,211 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<!--
+ | Generated by Apache Maven Doxia at Jul 17, 2012
+ | Rendered using Apache Maven Fluido Skin 1.2.1
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Apache Any23 - Plugins - Basic Crawler</title>
+    <link rel="stylesheet" href="./css/apache-maven-fluido.min.css" />
+    <link rel="stylesheet" href="./css/site.css" />
+    <link rel="stylesheet" href="./css/print.css" media="print" />
+
+      
+    <script type="text/javascript" src="./js/apache-maven-fluido.min.js"></script>
+
+    
+      <meta name="author" content="The Apache Software Foundation" />
+    <meta name="Date-Revision-yyyymmdd" content="20120717" />
+    <meta http-equiv="Content-Language" content="en" />
+    
+        </head>
+        <body class="topBarDisabled">
+          
+        
+    
+        <div class="container-fluid">
+          <div id="banner">
+        <div class="pull-left">
+                                                  <a href="index.html" id="bannerLeft">
+                                                                                                <img src="images/logo-any23.png"  alt="Apache Any23: Anything to Triples"/>
+                </a>
+                      </div>
+        <div class="pull-right">                                <a href="../" id="bannerRight">
+                                                                                                <img src="../images/apache-incubator-logo.png"  alt="Apache Incubator"/>
+                </a>
+      </div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+                
+                    
+                  <li id="publishDate">Last Published: 2012-07-17</li>
+                  <li class="divider">|</li> <li id="projectVersion">Version: 0.7.0-incubating</li>
+                      
+                
+                    
+      
+                  </ul>
+      </div>
+
+            <div class="row-fluid">
+        <div id="leftColumn" class="span3">
+          <div class="well sidebar-nav">
+                
+                    
+                                    <h3>Apache Any23</h3>
+                  <ul>
+                  <li class="none">
+                          <a href="index.html" title="Introduction">Introduction</a>
+            </li>
+                                                                                                                                                              <li class="collapsed">
+                          <a href="download.html" title="Downloads">Downloads</a>
+                  </li>
+                  <li class="none">
+                          <a href="install.html" title="Install">Install</a>
+            </li>
+          </ul>
+                        <h3>Documentation</h3>
+                  <ul>
+                  <li class="none">
+                          <a href="getting-started.html" title="Getting Started">Getting Started</a>
+            </li>
+                  <li class="none">
+                          <a href="supported-formats.html" title="Supported Formats">Supported Formats</a>
+            </li>
+                  <li class="none">
+                          <a href="extractors.html" title="Extractors">Extractors</a>
+            </li>
+                  <li class="none">
+                          <a href="configuration.html" title="Configuration">Configuration</a>
+            </li>
+                  <li class="none">
+                          <a href="service.html" title="REST Service">REST Service</a>
+            </li>
+                  <li class="none">
+                          <a href="any23-plugins.html" title="Any23 Plugins">Any23 Plugins</a>
+            </li>
+                  <li class="none">
+                          <a href="apidocs/index.html" title="APIs Doc">APIs Doc</a>
+            </li>
+                                                                                                                                                                                                                    <li class="collapsed">
+                          <a href="developers.html" title="Developers Guide">Developers Guide</a>
+                  </li>
+          </ul>
+                        <h3>Project Documentation</h3>
+                  <ul>
+                                                                                                                                                                                                                                                                                                                                <li class="collapsed">
+                          <a href="project-info.html" title="Project Information">Project Information</a>
+                  </li>
+                                                                                                                                            <li class="collapsed">
+                          <a href="project-reports.html" title="Project Reports">Project Reports</a>
+                  </li>
+          </ul>
+                        <h3>Misc</h3>
+                  <ul>
+                  <li class="none">
+                          <a href="acknowledgements.html" title="Acknowledgements">Acknowledgements</a>
+            </li>
+                  <li class="none">
+                          <a href="poweredby.html" title="PoweredBy">PoweredBy</a>
+            </li>
+          </ul>
+                        <h3>ASF</h3>
+                  <ul>
+                  <li class="none">
+                          <a href="http://www.apache.org/foundation/how-it-works.html" class="externalLink" title="How Apache Works">How Apache Works</a>
+            </li>
+                  <li class="none">
+                          <a href="http://www.apache.org/foundation/" class="externalLink" title="Foundation">Foundation</a>
+            </li>
+                  <li class="none">
+                          <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink" title="Sponsoring Apache">Sponsoring Apache</a>
+            </li>
+                  <li class="none">
+                          <a href="http://www.apache.org/foundation/thanks.html" class="externalLink" title="Thanks">Thanks</a>
+            </li>
+                  <li class="none">
+                          <a href="../" title="Apache Incubator">Apache Incubator</a>
+            </li>
+          </ul>
+                      
+                    
+                
+          <hr class="divider" />
+
+           <div id="poweredBy">
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                            <div class="clear"></div>
+                                                                                                                         <a href="http://tika.apache.org/" title="Apache Tika" class="poweredBy">
+        <img class="poweredBy"  alt="Apache Tika" src="images/apache-tika-90x30.png"    />
+      </a>
+                      </div>
+          </div>
+        </div>
+        
+        <div id="bodyColumn"  class="span9" >
+                                  
+            <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Basic Crawler Plugin<a name="Basic_Crawler_Plugin"></a></h2><p>The <i>Basic Crawler Plugi
 n</i> implements a <i>CLI</i> <a href="./xref/org/apache/any23/cli/Tool.html">Tool</a> extending <a href="./xref/org/apache/any23/cli/Rover.html">Rover</a> to add <i>site crawling</i> capabilities.</p><p>The tool can be used to extract semantic content from a small/medium size sites.</p><p>To use it make sure to have correctly configured the basic-crawler plugin to be found by the <i>any23tools</i> script (follow the <a href="./any23-plugins.html">Plugins</a> section instructions):</p><div class="source"><pre class="prettyprint">core/bin/$ ./any23tools Crawler
+usage: [{&lt;url&gt;|&lt;file&gt;}]+ [-d &lt;arg&gt;] [-e &lt;arg&gt;] [-f &lt;arg&gt;] [-h] [-l &lt;arg&gt;]
+       [-maxdepth &lt;arg&gt;] [-maxpages &lt;arg&gt;] [-n] [-numcrawlers &lt;arg&gt;] [-o
+       &lt;arg&gt;] [-p] [-pagefilter &lt;arg&gt;] [-politenessdelay &lt;arg&gt;] [-s]
+       [-storagefolder &lt;arg&gt;] [-t] [-v]
+ -d,--defaultns &lt;arg&gt;       Override the default namespace used to produce
+                            statements.
+ -e &lt;arg&gt;                   Specify a comma-separated list of extractors,
+                            e.g. rdf-xml,rdf-turtle.
+ -f,--Output format &lt;arg&gt;   [turtle (default), rdfxml, ntriples, nquads,
+                            trix, json, uri]
+ -h,--help                  Print this help.
+ -l,--log &lt;arg&gt;             Produce log within a file.
+ -maxdepth &lt;arg&gt;            Max allowed crawler depth. Default: no limit.
+ -maxpages &lt;arg&gt;            Max number of pages before interrupting crawl.
+                            Default: no limit.
+ -n,--nesting               Disable production of nesting triples.
+ -numcrawlers &lt;arg&gt;         Sets the number of crawlers. Default: 10
+ -o,--output &lt;arg&gt;          Specify Output file (defaults to standard
+                            output).
+ -p,--pedantic              Validate and fixes HTML content detecting
+                            commons issues.
+ -pagefilter &lt;arg&gt;          Regex used to filter out page URLs during
+                            crawling. Default:
+                            '.*(\.(css|js|bmp|gif|jpe?g|png|tiff?|mid|mp2|
+                            mp3|mp4|wav|wma|avi|mov|mpeg|ram|m4v|wmv|rm|sm
+                            il|pdf|swf|zip|rar|gz|xml|txt))$'
+ -politenessdelay &lt;arg&gt;     Politeness delay in milliseconds. Default: no
+                            limit.
+ -s,--stats                 Print out extraction statistics.
+ -storagefolder &lt;arg&gt;       Folder used to store crawler temporary data.
+                            Default:
+                            [/var/folders/d5/c_0b4h1d7t1gx6tzz_dn5cj40000g
+                            q/T/]
+ -t,--notrivial             Filter trivial statements (e.g. CSS related
+                            ones).
+ -v,--verbose               Show debug and progress information.</pre></div></div>
+                  </div>
+            </div>
+      
+    <hr/>
+
+    <footer>
+            <div class="container-fluid">
+              <div class="row span16">Copyright &copy;                    2010-2012
+                        <a href="http://www.apache.org/">The Apache Software Foundation</a>.
+            All Rights Reserved.      
+                    
+      </div>
+
+                                          <?xml version="1.0" encoding="UTF-8"?>
+<div class="row span16">Apache Any23, Apache, the Apache feather logo, and the Apache Any23 project logos are trademarks of The Apache Software Foundation.
+      All other marks mentioned may be trademarks or registered trademarks of their respective owners.</div>
+                  
+        
+                </div>
+    </footer>
+  </body>
+</html>

Propchange: incubator/any23/site/plugin-basic-crawler.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/any23/site/plugin-basic-crawler.html
------------------------------------------------------------------------------
    svn:keywords = Date Revision Author HeadURL Id

Propchange: incubator/any23/site/plugin-basic-crawler.html
------------------------------------------------------------------------------
    svn:mime-type = text/html