You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/02/03 02:29:24 UTC

svn commit: r1656615 - in /nutch/branches/2.x: ./ docker/hbase/ docker/hbase/config/

Author: lewismc
Date: Tue Feb  3 01:29:23 2015
New Revision: 1656615

URL: http://svn.apache.org/r1656615
Log:
NUTCH-1924 Nutch + HBase Docker

Added:
    nutch/branches/2.x/docker/hbase/
    nutch/branches/2.x/docker/hbase/Dockerfile
    nutch/branches/2.x/docker/hbase/README.md
    nutch/branches/2.x/docker/hbase/config/
    nutch/branches/2.x/docker/hbase/config/bashrc
    nutch/branches/2.x/docker/hbase/config/core-site.xml
    nutch/branches/2.x/docker/hbase/config/hbase-site.xml
    nutch/branches/2.x/docker/hbase/config/hdfs-site.xml
    nutch/branches/2.x/docker/hbase/config/mapred-site.xml
    nutch/branches/2.x/docker/hbase/config/nutch-site.xml
    nutch/branches/2.x/docker/hbase/config/run-services.sh
    nutch/branches/2.x/docker/hbase/config/ssh-config
    nutch/branches/2.x/docker/hbase/config/yarn-site.xml
Modified:
    nutch/branches/2.x/CHANGES.txt

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1656615&r1=1656614&r2=1656615&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Feb  3 01:29:23 2015
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development 2.4-SNAPSHOT
 
+* NUTCH-1924 Nutch + HBase Docker (Radosław Stankiewicz via lewismc)
+
 * NUTCH-1920 Upgrade Nutch to use Java 1.7 (lewismc)
 
 * NUTCH-1893 Parse-tika failes to parse feed files (Mengying Wang via snagel)

Added: nutch/branches/2.x/docker/hbase/Dockerfile
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/Dockerfile?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/Dockerfile (added)
+++ nutch/branches/2.x/docker/hbase/Dockerfile Tue Feb  3 01:29:23 2015
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from stackbrew/ubuntu:saucy
+MAINTAINER Radoslaw Stankiewicz <rr...@gmail.com>
+
+WORKDIR /root/
+
+# Install package with add-apt-repository
+RUN apt-get update && apt-get install -y software-properties-common
+
+# Enable Ubuntu repositories
+RUN add-apt-repository -y multiverse && \
+  add-apt-repository -y restricted && \
+  add-apt-repository -y ppa:webupd8team/java && \
+  apt-get update && apt-get upgrade -y
+
+# Install latest Oracle Java from PPA
+RUN echo oracle-java7-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections && \
+  apt-get install -y oracle-java7-installer oracle-java7-set-default
+
+# Install dependencies
+RUN apt-get install -y ant openssh-server zookeeperd vim telnet subversion rsync curl build-essential maven
+
+# Download Hadoop
+RUN wget -q 'https://archive.apache.org/dist/hadoop/core/hadoop-2.4.0/hadoop-2.4.0.tar.gz'
+RUN wget -q 'https://github.com/apache/hbase/archive/0.94.14.tar.gz' && mv 0.94.14.tar.gz hbase-0.94.14.tar.gz
+RUN wget -q 'https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz'
+RUN svn checkout http://svn.apache.org/repos/asf/nutch/branches/2.x/ nutch-sources
+
+# Setup system user and group to own and run Hadoop
+RUN addgroup hadoop && adduser --ingroup hadoop hduser
+RUN usermod -a -G hadoop zookeeper
+
+# Setup SSH keys for passwordless access
+RUN su -l -c 'ssh-keygen -t rsa -f /home/hduser/.ssh/id_rsa -P ""' hduser && \
+  cat /home/hduser/.ssh/id_rsa.pub | su -l -c 'tee -a /home/hduser/.ssh/authorized_keys' hduser
+ADD config/ssh-config /home/hduser/.ssh/config
+RUN chmod 600 /home/hduser/.ssh/config
+RUN chown hduser /home/hduser/.ssh/config
+
+# Fix Ubuntu 13.10 SSH daemon problem with docker: http://docs.docker.io/en/latest/examples/running_ssh_service/
+RUN sed -ri 's/session[[:blank:]]+required[[:blank:]]+pam_loginuid.so/session optional pam_loginuid.so/g' /etc/pam.d/sshd
+
+# Deploy and setup file permissions
+RUN tar xvfz /root/hadoop-2.4.0.tar.gz -C /opt && \
+  ln -s /opt/hadoop-2.4.0 /opt/hadoop && \
+  chown -R root:root /opt/hadoop-2.4.0 && \
+  mkdir /opt/hadoop-2.4.0/logs && \
+  chown -R hduser:hadoop /opt/hadoop-2.4.0/logs
+
+# Unpack and compile Google protobuf
+RUN tar xvfz /root/protobuf-2.5.0.tar.gz -C /opt && \
+  chown -R root:root /opt/protobuf-2.5.0
+RUN cd /opt/protobuf-2.5.0 && ./configure && make && make check && make install
+
+# Deploy and setup file permissions
+RUN tar xvfz /root/hbase-0.94.14.tar.gz -C /opt && \
+  chown -R root:root /opt/hbase-0.94.14
+# http://hbase.apache.org/book.html#basic.prerequisites - 4.1.1 - HBase has to be compiled from sources :(
+RUN vim -c '%s/2.4.0a/2.5.0/g' -c '%s/2.0.0-alpha/2.4.0' -c 'x' /opt/hbase-0.94.14/pom.xml
+RUN cd /opt/hbase-0.94.14/ && mvn clean install assembly:single -Dhadoop.profile=2.0 -DskipTests
+
+# link binaries, create logs directory
+RUN ln -s /opt/hbase-0.94.14/target/hbase-0.94.14/hbase-0.94.14 /opt/hbase &&  mkdir /opt/hbase/logs && \
+  chown -R hduser:hadoop /opt/hbase/logs
+
+ # Deploy and setup file permissions
+RUN mv /root/nutch-sources /opt/apache-nutch-2.x && \
+  chown -R root:root /opt/apache-nutch-2.x && \
+  mkdir /opt/apache-nutch-2.x/logs && \
+  chown -R hduser:hadoop /opt/apache-nutch-2.x/logs
+  
+# Setup hduser environment
+ADD config/bashrc /home/hduser/.bashrc
+
+# Add Hadoop, HBase and nutch configs
+ADD config/core-site.xml /tmp/hadoop-etc/core-site.xml
+ADD config/mapred-site.xml /tmp/hadoop-etc/mapred-site.xml
+ADD config/hdfs-site.xml /tmp/hadoop-etc/hdfs-site.xml
+ADD config/yarn-site.xml /tmp/hadoop-etc/yarn-site.xml
+ADD config/hbase-site.xml /tmp/hbase-etc/hbase-site.xml
+ADD config/nutch-site.xml /tmp/nutch-etc/nutch-site.xml
+RUN mv /tmp/hadoop-etc/* /opt/hadoop/etc/hadoop
+RUN mv /tmp/hbase-etc/* /opt/hbase/conf/
+RUN mv /tmp/nutch-etc/* /opt/apache-nutch-2.x/conf/
+
+
+ENV NUTCH_ROOT /opt/apache-nutch-2.x
+RUN echo 'gora.datastore.default=org.apache.gora.hbase.store.HBaseStore' >> /opt/apache-nutch-2.x/conf/gora.properties
+RUN vim -c 'g/name="gora-hbase"/+1d' -c 'x' $NUTCH_ROOT/ivy/ivy.xml
+RUN vim -c 'g/name="gora-hbase"/-1d' -c 'x' $NUTCH_ROOT/ivy/ivy.xml
+RUN cd $NUTCH_ROOT && ant runtime
+RUN ln -s /opt/apache-nutch-2.x/runtime/local /opt/nutch
+ENV NUTCH_HOME /opt/nutch
+ENV HADOOP_HOME /opt/hadoop
+ENV NUTCHSERVER_PORT 8899
+
+
+# expose ports, probably not all, probably some old (pre Hadoop 2.0) ports as well
+# NUTCH
+EXPOSE 8899
+
+# Expose SSHD
+EXPOSE 22
+
+# QuorumPeerMain (Zookeeper)
+EXPOSE 2181 39534
+
+# NameNode (HDFS)
+EXPOSE 8020 50070 9000
+
+# DataNode (HDFS)
+EXPOSE 50010 50020 50075
+
+# SecondaryNameNode (HDFS)
+EXPOSE 50090
+
+# Trackers
+EXPOSE 50030 50060
+
+#HBASE
+EXPOSE 6000 60010 60020 60030
+
+# Thrift
+EXPOSE 9090 9095
+
+
+# Create start script
+ADD config/run-services.sh /root/run-services.sh
+RUN chmod +x /root/run-services.sh
+
+CMD ["/root/run-services.sh"]

Added: nutch/branches/2.x/docker/hbase/README.md
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/README.md?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/README.md (added)
+++ nutch/branches/2.x/docker/hbase/README.md Tue Feb  3 01:29:23 2015
@@ -0,0 +1,59 @@
+# Nutch Dockerfile #
+
+This directory contains Dockerfile of [Nutch](http://nutch.apache.org) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/dockerfile/elasticsearch/) published to the [Hub Registry](https://registry.hub.docker.com/).
+
+## What is Nutch?
+
+![Nutch logo](https://wiki.apache.org/nutch/FrontPage?action=AttachFile&do=get&target=nutch_logo_medium.gif "Nutch")
+
+Apache Nutch is a highly extensible and scalable open source web crawler software project. 
+
+Nutch can run on a single machine, but gains a lot of its strength from running in a Hadoop cluster
+
+## Docker Image
+
+Current configuration of this image consists of components:
+	
+*	Hadoop 2.4.0
+*	HBase 0.94.14
+*	Nutch 2.x
+
+##  Base Image
+
+* [stackbrew/ubuntu:saucy](https://registry.hub.docker.com/u/stackbrew/ubuntu/)
+
+
+## Installation
+
+1. Install [Docker](https://www.docker.com/).
+
+2a. Download automated build from public hub registry `docker pull nutch/nutch_with_hbase_hadoop`
+
+2b. Build from files in this directory:
+
+	$(boot2docker shellinit)
+	docker build -t <new name for image> .
+
+## Usage
+
+Start docker
+ 
+	boot2docker up
+	$(boot2docker shellinit)
+
+Start an image and enter shell. First command will start image and will print on stdout standard logs.
+
+	IMAGE_PID=$(docker run -i -t  nutch_with_hbase_hadoop)
+	docker exec -i -t $IMAGE_PID bash
+
+
+Nutch is located in /opt/nutch/ and is almost ready to run.
+Review configuration in /opt/nutch/conf/ and you can start crawling.
+
+	echo 'http://nutch.apache.org' > seed.txt
+	/opt/nutch/bin/nutch inject seed.txt
+	/opt/nutch/bin/nutch generate -topN 10 -- this will return batchId
+	/opt/nutch/bin/nutch fetch <batchId>
+	/opt/nutch/bin/nutch parse <batchId>
+	/opt/nutch/bin/nutch updatedb <batchId>
+	[...]

Added: nutch/branches/2.x/docker/hbase/config/bashrc
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/bashrc?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/bashrc (added)
+++ nutch/branches/2.x/docker/hbase/config/bashrc Tue Feb  3 01:29:23 2015
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export JAVA_HOME=/usr/lib/jvm/java-7-oracle/
+export HADOOP_PREFIX=/opt/hadoop
+export HADOOP_HOME=/opt/hadoop
+export HADOOP_COMMON_HOME=$HADOOP_PREFIX
+export HADOOP_HDFS_HOME=$HADOOP_PREFIX
+export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
+
+export HADOOP_CONF_DIR=$HADOOP_HDFS_HOME/etc/hadoop
+
+export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_PREFIX/lib/native
+export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib -Djava.net.preferIPv4Stack=true"
+
+export PATH=$PATH:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin
+export HBASE_MANAGES_ZK=false

Added: nutch/branches/2.x/docker/hbase/config/core-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/core-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/core-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/core-site.xml Tue Feb  3 01:29:23 2015
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+  <property>
+    <name>fs.default.name</name>
+    <value>hdfs://localhost:9000</value>
+  </property>
+  <property>
+    <name>hadoop.tmp.dir</name>
+    <value>/home/hduser/data/hadoop/tmp</value>
+  </property>
+  <property>
+    <name>fs.checkpoint.dir</name>
+    <value>/home/hduser/data/hadoop/snn</value>
+  </property>
+</configuration>

Added: nutch/branches/2.x/docker/hbase/config/hbase-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/hbase-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/hbase-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/hbase-site.xml Tue Feb  3 01:29:23 2015
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+ <property>
+    <name>hbase.rootdir</name>
+    <value>hdfs://localhost:9000/hbase</value>
+  </property>
+  <property>
+    <name>hbase.cluster.distributed</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hbase.zookeeper.quorum</name>
+    <value>localhost</value>
+  </property>
+  <property>
+    <name>dfs.replication</name>
+    <value>1</value>
+  </property>
+</configuration>

Added: nutch/branches/2.x/docker/hbase/config/hdfs-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/hdfs-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/hdfs-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/hdfs-site.xml Tue Feb  3 01:29:23 2015
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+  <property>
+    <name>dfs.replication</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>dfs.name.dir</name>
+    <value>/home/hduser/data/hadoop/nn</value>
+  </property>
+  <property>
+    <name>dfs.data.dir</name>
+    <value>/home/hduser/data/hadoop/dn</value>
+  </property>
+  <property>
+    <name>dfs.permissions.supergroup</name>
+    <value>hadoop</value>
+  </property>
+</configuration>

Added: nutch/branches/2.x/docker/hbase/config/mapred-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/mapred-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/mapred-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/mapred-site.xml Tue Feb  3 01:29:23 2015
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+    <property>
+        <name>mapreduce.framework.name</name>
+        <value>yarn</value>
+    </property>
+</configuration>

Added: nutch/branches/2.x/docker/hbase/config/nutch-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/nutch-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/nutch-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/nutch-site.xml Tue Feb  3 01:29:23 2015
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+  <property>
+    <name>http.agent.name</name>
+    <value>your-crawler-name</value>
+  </property>
+  <property>
+    <name>storage.data.store.class</name>
+    <value>org.apache.gora.hbase.store.HBaseStore</value>
+    <description>Default class for storing data</description>
+  </property>
+</configuration>

Added: nutch/branches/2.x/docker/hbase/config/run-services.sh
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/run-services.sh?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/run-services.sh (added)
+++ nutch/branches/2.x/docker/hbase/config/run-services.sh Tue Feb  3 01:29:23 2015
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+# append ssh public key to authorized_keys file
+#echo $AUTHORIZED_SSH_PUBLIC_KEY >> /home/hduser/.ssh/authorized_keys
+
+# format the namenode if it's not already done
+su -l -c 'mkdir -p /home/hduser/data/hadoop/nn /home/hduser/data/hadoop/dn && /opt/hadoop/bin/hadoop namenode -format' hduser
+
+# start ssh daemon
+service ssh start
+
+# start zookeeper used for HDFS
+service zookeeper start
+
+# clear hadoop logs
+rm -fr /opt/hadoop/logs/*
+
+# start HDFS
+su -l -c '/opt/hadoop/sbin/start-dfs.sh' hduser
+
+# start YARN
+su -l -c '/opt/hadoop/sbin/start-yarn.sh' hduser
+
+#start HBASE
+su -l -c '/opt/hbase/bin/start-hbase.sh' hduser
+
+#start HBASE thrift
+su -l -c '/opt/hbase/bin/hbase thrift start > /opt/hbase/logs/thrift.log 2>&1 &' hduser
+
+sleep 1
+
+# tail log directory
+tail -n 1000 -f /opt/hadoop/logs/*.log /opt/hbase/logs/*.log /opt/nutch/logs/*.log

Added: nutch/branches/2.x/docker/hbase/config/ssh-config
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/ssh-config?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/ssh-config (added)
+++ nutch/branches/2.x/docker/hbase/config/ssh-config Tue Feb  3 01:29:23 2015
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Host *
+  StrictHostKeyChecking no

Added: nutch/branches/2.x/docker/hbase/config/yarn-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/yarn-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/yarn-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/yarn-site.xml Tue Feb  3 01:29:23 2015
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<configuration>
+    <property>
+        <name>yarn.nodemanager.aux-services</name>
+        <value>mapreduce_shuffle</value>
+    </property>
+
+    <property>
+      <name>yarn.application.classpath</name>
+      <value>/opt/hadoop/etc/hadoop,/opt/hadoop/share/hadoop/common/*,/opt/hadoop/share/hadoop/common/lib/*,/opt/hadoop/share/hadoop/hdfs/*,/opt/hadoop/share/hadoop/hdfs/lib/*,/opt/hadoop/share/hadoop/mapreduce/*,/opt/hadoop/share/hadoop/mapreduce/lib/*,/opt/hadoop/share/hadoop/yarn/*,/opt/hadoop/share/hadoop/yarn/lib/*</value>
+    </property>
+
+    <property>
+    <description>
+      Number of seconds after an application finishes before the nodemanager's
+      DeletionService will delete the application's localized file directory
+      and log directory.
+
+      To diagnose Yarn application problems, set this property's value large
+      enough (for example, to 600 = 10 minutes) to permit examination of these
+      directories. After changing the property's value, you must restart the
+      nodemanager in order for it to have an effect.
+
+      The roots of Yarn applications' work directories is configurable with
+      the yarn.nodemanager.local-dirs property (see below), and the roots
+      of the Yarn applications' log directories is configurable with the
+      yarn.nodemanager.log-dirs property (see also below).
+    </description>
+    <name>yarn.nodemanager.delete.debug-delay-sec</name>
+    <value>600</value>
+  </property>
+
+</configuration>