You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bigtop.apache.org by rv...@apache.org on 2012/04/20 17:54:14 UTC

svn commit: r1328428 - in /incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources: apt/package_data.xml package_data.xml

Author: rvs
Date: Fri Apr 20 15:54:13 2012
New Revision: 1328428

URL: http://svn.apache.org/viewvc?rev=1328428&view=rev
Log:
BIGTOP-545. package testing manifest in trunk needs to be updated

Modified:
    incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml
    incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml

Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml?rev=1328428&r1=1328427&r2=1328428&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml Fri Apr 20 15:54:13 2012
@@ -20,7 +20,7 @@
   <users>
     <flume>
       <home>/var/run/flume</home>
-      <descr>Flume</descr>
+      <descr>Flume User</descr>
       <shell>/sbin/nologin</shell>
     </flume>
   </users>
@@ -32,7 +32,7 @@
   <users>
     <sqoop>
       <home>/var/lib/sqoop</home>
-      <descr>Sqoop</descr>
+      <descr>Sqoop User</descr>
       <shell>/sbin/nologin</shell>
     </sqoop>
   </users>
@@ -44,7 +44,7 @@
   <users>
     <zookeeper>
       <home>/var/lib/zookeeper</home>
-      <descr>ZooKeeper</descr>
+      <descr>ZooKeeper User</descr>
       <shell>/bin/false</shell>
     </zookeeper>
   </users>
@@ -75,7 +75,7 @@
   <users>
     <hive>
       <home>/var/lib/hive</home>
-      <descr>Hive</descr>
+      <descr>Hive User</descr>
       <shell>/sbin/nologin</shell>
     </hive>
   </users>
@@ -84,7 +84,7 @@
   <users>
     <hive>
       <home>/var/lib/hive</home>
-      <descr>Hive</descr>
+      <descr>Hive User</descr>
       <shell>/sbin/nologin</shell>
     </hive>
   </users>

Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml?rev=1328428&r1=1328427&r2=1328428&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml Fri Apr 20 15:54:13 2012
@@ -91,7 +91,7 @@ Java Servlet and JavaServer Pages techno
 </whirr>
 <flume>
   <metadata>
-    <summary>reliable, scalable, and manageable distributed data collection application</summary>
+    <summary>Flume is a reliable, scalable, and manageable distributed log collection application for collecting data such as logs and delivering it to data stores such as Hadoop's HDFS.</summary>
     <description>Flume is a reliable, scalable, and manageable distributed data collection
  application for collecting data such as logs and delivering it to data stores
  such as Hadoop's HDFS.  It can efficiently collect, aggregate, and move large
@@ -123,7 +123,7 @@ Java Servlet and JavaServer Pages techno
 </flume>
 <flume-node>
   <metadata>
-    <summary>core element of Flume's data path that collects and delivers data</summary>
+    <summary>The flume node daemon is a core element of flume's data path and is responsible for generating, processing, and delivering data.</summary>
     <description>The Flume node daemon is a core element of flume's data path and is responsible for generating, processing, and delivering data.</description>
     <url>http://incubator.apache.org/projects/flume.html</url>
   </metadata>
@@ -140,7 +140,7 @@ Java Servlet and JavaServer Pages techno
 </flume-node>
 <sqoop>
   <metadata>
-    <summary>Tool for easy imports and exports of data sets between databases and HDFS</summary>
+    <summary>Sqoop allows easy imports and exports of data sets between databases and the Hadoop Distributed File System (HDFS).</summary>
     <description>Sqoop is a tool that provides the ability to import and export data sets between
  the Hadoop Distributed File System (HDFS) and relational databases.</description>
     <url>http://incubator.apache.org/sqoop/</url>
@@ -161,7 +161,7 @@ Java Servlet and JavaServer Pages techno
 <sqoop-metastore>
   <metadata>
     <summary>Shared metadata repository for Sqoop.</summary>
-    <description>This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
+    <description>Shared metadata repository for Sqoop. This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
     <url>http://incubator.apache.org/sqoop/</url>
   </metadata>
   <deps>
@@ -182,7 +182,7 @@ Java Servlet and JavaServer Pages techno
 </sqoop-metastore>
 <oozie>
   <metadata>
-    <summary>A workflow and coordinator sytem for Hadoop jobs.</summary>
+    <summary>Oozie is a system that runs workflows of Hadoop jobs.</summary>
     <description>Oozie workflows are actions arranged in a control dependency DAG (Direct
  Acyclic Graph).
  Oozie coordinator functionality allows to start workflows at regular
@@ -238,11 +238,14 @@ Java Servlet and JavaServer Pages techno
 <oozie-client>
   <metadata>
     <summary>Client for Oozie Workflow Engine</summary>
-    <description>Command line utility that allows
- remote access and operation of oozie. Using this utility, the
- user can deploy workflows and perform other administrative and
- monitoring tasks such as start, stop, kill, resume workflows
- and coordinator jobs.</description>
+    <description>Oozie client is a command line client utility that allows remote
+administration and monitoring of worflows. Using this client
+utility you can submit worflows, start/suspend/resume/kill
+workflows and find out their status at any instance. Apart from
+such operations, you can also change the status of the entire
+system, get vesion information. This client utility also allows
+you to validate any worflows before they are deployed to the Oozie
+server.</description>
     <url>http://incubator.apache.org/oozie/</url>
   </metadata>
   <alternatives>
@@ -284,7 +287,7 @@ Java Servlet and JavaServer Pages techno
 </zookeeper>
 <zookeeper-server>
   <metadata>
-    <summary>This runs the zookeeper server on startup.</summary>
+    <summary>The Hadoop Zookeeper server</summary>
     <description>This package starts the zookeeper server on startup</description>
     <url>http://zookeeper.apache.org/</url>
   </metadata>
@@ -301,7 +304,7 @@ Java Servlet and JavaServer Pages techno
 </zookeeper-server>
 <pig>
   <metadata>
-    <summary>A platform for analyzing large data sets using Hadoop</summary>
+    <summary>Pig is a platform for analyzing large data sets</summary>
     <description>Pig is a platform for analyzing large data sets that consists of a high-level language
  for expressing data analysis programs, coupled with infrastructure for evaluating these
  programs. The salient property of Pig programs is that their structure is amenable
@@ -339,7 +342,7 @@ Java Servlet and JavaServer Pages techno
 </pig>
 <hive>
   <metadata>
-    <summary>A data warehouse infrastructure built on top of Hadoop</summary>
+    <summary>Hive is a data warehouse infrastructure built on top of Hadoop</summary>
     <description>Hive is a data warehouse infrastructure built on top of Hadoop that
  provides tools to enable easy data summarization, adhoc querying and
  analysis of large datasets data stored in Hadoop files. It provides a
@@ -368,7 +371,7 @@ Java Servlet and JavaServer Pages techno
 <hive-metastore>
   <metadata>
     <summary>Shared metadata repository for Hive.</summary>
-    <description>This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
+    <description>This optional package hosts a metadata server for Hive clients across a network to use.</description>
     <url>http://hive.apache.org/</url>
   </metadata>
   <deps>
@@ -389,8 +392,8 @@ Java Servlet and JavaServer Pages techno
 </hive-metastore>
 <hive-server>
   <metadata>
-    <summary>Shared metadata repository for Hive.</summary>
-    <description>This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
+    <summary>Provides a Hive Thrift service.</summary>
+    <description>This optional package hosts a Thrift server for Hive clients across a network to use.</description>
     <url>http://hive.hadoop.apache.org/</url>
   </metadata>
   <deps>
@@ -411,7 +414,7 @@ Java Servlet and JavaServer Pages techno
 </hive-server>
 <hbase>
   <metadata>
-    <summary>HBase is the Hadoop database</summary>
+    <summary>HBase is the Hadoop database. Use it when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware.</summary>
     <description>Use it when you need random, realtime read/write access to your Big Data.
  This project's goal is the hosting of very large tables -- billions of rows
  X millions of columns -- atop clusters of commodity hardware.</description>
@@ -438,14 +441,14 @@ Java Servlet and JavaServer Pages techno
 </hbase>
 <hbase-doc>
   <metadata>
-    <summary>Documentation for HBase</summary>
+    <summary>Hbase Documentation</summary>
     <description>This package contains the HBase manual and JavaDoc.</description>
     <url>http://hbase.apache.org/</url>
   </metadata>
 </hbase-doc>
 <hbase-master>
   <metadata>
-    <summary>HMaster is the "master server" for a HBase</summary>
+    <summary>The Hadoop HBase master Server.</summary>
     <description>There is only one HMaster for a single HBase deployment.</description>
     <url>http://hbase.apache.org/</url>
   </metadata>
@@ -462,7 +465,7 @@ Java Servlet and JavaServer Pages techno
 </hbase-master>
 <hbase-regionserver>
   <metadata>
-    <summary>HRegionServer makes a set of HRegions available to clients</summary>
+    <summary>The Hadoop HBase RegionServer server.</summary>
     <description>It checks in with the HMaster. There are many HRegionServers in a single
  HBase deployment.</description>
     <url>http://hbase.apache.org/</url>
@@ -480,7 +483,7 @@ Java Servlet and JavaServer Pages techno
 </hbase-regionserver>
 <hbase-thrift>
   <metadata>
-    <summary>Provides an HBase Thrift service</summary>
+    <summary>The Hadoop HBase Thrift Interface</summary>
     <description>This package provides a Thrift service interface to the HBase distributed
  database.</description>
     <url>http://hbase.apache.org/</url>
@@ -516,26 +519,25 @@ Java Servlet and JavaServer Pages techno
 <hadoop>
   <metadata>
     <summary>Hadoop is a software platform for processing vast amounts of data</summary>
-    <description>A software platform for processing vast amounts of data
- Hadoop is a software platform that lets one easily write and
- run applications that process vast amounts of data.
- .
- Here's what makes Hadoop especially useful:
-  * Scalable: Hadoop can reliably store and process petabytes.
-  * Economical: It distributes the data and processing across clusters
-                of commonly available computers. These clusters can number
-                into the thousands of nodes.
-  * Efficient: By distributing the data, Hadoop can process it in parallel
-               on the nodes where the data is located. This makes it
-               extremely rapid.
-  * Reliable: Hadoop automatically maintains multiple copies of data and
-              automatically redeploys computing tasks based on failures.
- .
- Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS).
- MapReduce divides applications into many small blocks of work. HDFS creates
- multiple replicas of data blocks for reliability, placing them on compute
- nodes around the cluster. MapReduce can then process the data where it is
- located.</description>
+    <description>Hadoop is a software platform that lets one easily write and
+run applications that process vast amounts of data.
+
+Here's what makes Hadoop especially useful:
+* Scalable: Hadoop can reliably store and process petabytes.
+* Economical: It distributes the data and processing across clusters
+              of commonly available computers. These clusters can number
+              into the thousands of nodes.
+* Efficient: By distributing the data, Hadoop can process it in parallel
+             on the nodes where the data is located. This makes it
+             extremely rapid.
+* Reliable: Hadoop automatically maintains multiple copies of data and
+            automatically redeploys computing tasks based on failures.
+
+Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS).
+MapReduce divides applications into many small blocks of work. HDFS creates
+multiple replicas of data blocks for reliability, placing them on compute
+nodes around the cluster. MapReduce can then process the data where it is
+located.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -552,10 +554,11 @@ Java Servlet and JavaServer Pages techno
 </hadoop>
 <hadoop-hdfs>
   <metadata>
-    <summary>Hadoop Pipes Library</summary>
-    <description>Interface to author Hadoop MapReduce jobs in C++
- Contains Hadoop Pipes, a library which allows Hadoop MapReduce jobs to be
- written in C++.</description>
+    <summary>The Hadoop Distributed File System</summary>
+    <description>Hadoop Distributed File System (HDFS) is the primary storage system used by
+Hadoop applications. HDFS creates multiple replicas of data blocks and distributes
+them on compute nodes throughout a cluster to enable reliable, extremely rapid
+computations.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -570,10 +573,20 @@ Java Servlet and JavaServer Pages techno
 </hadoop-hdfs>
 <hadoop-yarn>
   <metadata>
-    <summary>Hadoop Pipes Library</summary>
-    <description>Interface to author Hadoop MapReduce jobs in C++
- Contains Hadoop Pipes, a library which allows Hadoop MapReduce jobs to be
- written in C++.</description>
+    <summary>The Hadoop NextGen MapReduce (YARN)</summary>
+    <description>YARN (Hadoop NextGen MapReduce) is a general purpose data-computation framework.
+The fundamental idea of YARN is to split up the two major functionalities of the
+JobTracker, resource management and job scheduling/monitoring, into separate daemons:
+ResourceManager and NodeManager.
+
+The ResourceManager is the ultimate authority that arbitrates resources among all
+the applications in the system. The NodeManager is a per-node slave managing allocation
+of computational resources on a single node. Both work in support of per-application
+ApplicationMaster (AM).
+
+An ApplicationMaster is, in effect, a framework specific library and is tasked with
+negotiating resources from the ResourceManager and working with the NodeManager(s) to
+execute and monitor the tasks.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -588,10 +601,9 @@ Java Servlet and JavaServer Pages techno
 </hadoop-yarn>
 <hadoop-mapreduce>
   <metadata>
-    <summary>Hadoop Pipes Library</summary>
-    <description>Interface to author Hadoop MapReduce jobs in C++
- Contains Hadoop Pipes, a library which allows Hadoop MapReduce jobs to be
- written in C++.</description>
+    <summary>The Hadoop MapReduce (MRv2)</summary>
+    <description>Hadoop MapReduce is a programming model and software framework for writing applications
+that rapidly process vast amounts of data in parallel on large clusters of compute nodes.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -606,10 +618,9 @@ Java Servlet and JavaServer Pages techno
 </hadoop-mapreduce>
 <hadoop-httpfs>
   <metadata>
-    <summary>Native libraries for Hadoop Compression</summary>
-    <description>Native libraries for Hadoop (e.g., compression)
- This optional package contains native libraries that increase the performance
- of Hadoop's compression.</description>
+    <summary>HTTPFS for Hadoop</summary>
+    <description>The server providing HTTP REST API support for the complete FileSystem/FileContext
+interface in HDFS.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -632,9 +643,8 @@ Java Servlet and JavaServer Pages techno
 <hadoop-hdfs-namenode>
   <metadata>
     <summary>The Hadoop namenode manages the block locations of HDFS files</summary>
-    <description>Name Node for Hadoop
- The Hadoop Distributed Filesystem (HDFS) requires one unique server, the
- namenode, which manages the block locations of files on the filesystem.</description>
+    <description>The Hadoop Distributed Filesystem (HDFS) requires one unique server, the
+namenode, which manages the block locations of files on the filesystem.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -672,10 +682,9 @@ election.</description>
 <hadoop-hdfs-secondarynamenode>
   <metadata>
     <summary>Hadoop Secondary namenode</summary>
-    <description>Secondary Name Node for Hadoop
- The Secondary Name Node is responsible for checkpointing file system images.
- It is _not_ a failover pair for the namenode, and may safely be run on the
- same machine.</description>
+    <description>The Secondary Name Node periodically compacts the Name Node EditLog
+into a checkpoint.  This compaction ensures that Name Node restarts
+do not incur unnecessary downtime.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -692,10 +701,9 @@ election.</description>
 <hadoop-hdfs-datanode>
   <metadata>
     <summary>Hadoop Data Node</summary>
-    <description>Data Node for Hadoop
- The Data Nodes in the Hadoop Cluster are responsible for serving up
- blocks of data over the network to Hadoop Distributed Filesystem
- (HDFS) clients.</description>
+    <description>The Data Nodes in the Hadoop Cluster are responsible for serving up
+blocks of data over the network to Hadoop Distributed Filesystem
+(HDFS) clients.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -711,12 +719,8 @@ election.</description>
 </hadoop-hdfs-datanode>
 <hadoop-yarn-resourcemanager>
   <metadata>
-    <summary>Hadoop Job Tracker</summary>
-    <description>Job Tracker for Hadoop
- The jobtracker is a central service which is responsible for managing
- the tasktracker services running on all nodes in a Hadoop Cluster.
- The jobtracker allocates work to the tasktracker nearest to the data
- with an available work slot.</description>
+    <summary>Yarn Resource Manager</summary>
+    <description>The resource manager manages the global assignment of compute resources to applications</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -732,11 +736,10 @@ election.</description>
 </hadoop-yarn-resourcemanager>
 <hadoop-yarn-nodemanager>
   <metadata>
-    <summary>Hadoop Task Tracker</summary>
-    <description>Task Tracker for Hadoop
- The Task Tracker is the Hadoop service that accepts MapReduce tasks and
- computes results. Each node in a Hadoop cluster that should be doing
- computation should run a Task Tracker.</description>
+    <summary>Yarn Node Manager</summary>
+    <description>The NodeManager is the per-machine framework agent who is responsible for
+containers, monitoring their resource usage (cpu, memory, disk, network) and
+reporting the same to the ResourceManager/Scheduler.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -752,11 +755,8 @@ election.</description>
 </hadoop-yarn-nodemanager>
 <hadoop-yarn-proxyserver>
   <metadata>
-    <summary>Hadoop Task Tracker</summary>
-    <description>Task Tracker for Hadoop
- The Task Tracker is the Hadoop service that accepts MapReduce tasks and
- computes results. Each node in a Hadoop cluster that should be doing
- computation should run a Task Tracker.</description>
+    <summary>Yarn Web Proxy</summary>
+    <description>The web proxy server sits in front of the YARN application master web UI.</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -772,11 +772,8 @@ election.</description>
 </hadoop-yarn-proxyserver>
 <hadoop-mapreduce-historyserver>
   <metadata>
-    <summary>Hadoop Task Tracker</summary>
-    <description>Task Tracker for Hadoop
- The Task Tracker is the Hadoop service that accepts MapReduce tasks and
- computes results. Each node in a Hadoop cluster that should be doing
- computation should run a Task Tracker.</description>
+    <summary>MapReduce History Server</summary>
+    <description>The History server keeps records of the different activities being performed on a Apache Hadoop cluster</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
   <deps>
@@ -812,9 +809,7 @@ election.</description>
 <hadoop-doc>
   <metadata>
     <summary>Hadoop Documentation</summary>
-    <description>Documentation for Hadoop
- This package contains the Java Documentation for Hadoop and its relevant
- APIs.</description>
+    <description>Documentation for Hadoop</description>
     <url>http://hadoop.apache.org/core/</url>
   </metadata>
 </hadoop-doc>