You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/02/03 02:29:24 UTC
svn commit: r1656615 - in /nutch/branches/2.x: ./ docker/hbase/
docker/hbase/config/
Author: lewismc
Date: Tue Feb 3 01:29:23 2015
New Revision: 1656615
URL: http://svn.apache.org/r1656615
Log:
NUTCH-1924 Nutch + HBase Docker
Added:
nutch/branches/2.x/docker/hbase/
nutch/branches/2.x/docker/hbase/Dockerfile
nutch/branches/2.x/docker/hbase/README.md
nutch/branches/2.x/docker/hbase/config/
nutch/branches/2.x/docker/hbase/config/bashrc
nutch/branches/2.x/docker/hbase/config/core-site.xml
nutch/branches/2.x/docker/hbase/config/hbase-site.xml
nutch/branches/2.x/docker/hbase/config/hdfs-site.xml
nutch/branches/2.x/docker/hbase/config/mapred-site.xml
nutch/branches/2.x/docker/hbase/config/nutch-site.xml
nutch/branches/2.x/docker/hbase/config/run-services.sh
nutch/branches/2.x/docker/hbase/config/ssh-config
nutch/branches/2.x/docker/hbase/config/yarn-site.xml
Modified:
nutch/branches/2.x/CHANGES.txt
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1656615&r1=1656614&r2=1656615&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Tue Feb 3 01:29:23 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development 2.4-SNAPSHOT
+* NUTCH-1924 Nutch + HBase Docker (RadosÅaw Stankiewicz via lewismc)
+
* NUTCH-1920 Upgrade Nutch to use Java 1.7 (lewismc)
* NUTCH-1893 Parse-tika failes to parse feed files (Mengying Wang via snagel)
Added: nutch/branches/2.x/docker/hbase/Dockerfile
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/Dockerfile?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/Dockerfile (added)
+++ nutch/branches/2.x/docker/hbase/Dockerfile Tue Feb 3 01:29:23 2015
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from stackbrew/ubuntu:saucy
+MAINTAINER Radoslaw Stankiewicz <rr...@gmail.com>
+
+WORKDIR /root/
+
+# Install package with add-apt-repository
+RUN apt-get update && apt-get install -y software-properties-common
+
+# Enable Ubuntu repositories
+RUN add-apt-repository -y multiverse && \
+ add-apt-repository -y restricted && \
+ add-apt-repository -y ppa:webupd8team/java && \
+ apt-get update && apt-get upgrade -y
+
+# Install latest Oracle Java from PPA
+RUN echo oracle-java7-installer shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections && \
+ apt-get install -y oracle-java7-installer oracle-java7-set-default
+
+# Install dependencies
+RUN apt-get install -y ant openssh-server zookeeperd vim telnet subversion rsync curl build-essential maven
+
+# Download Hadoop
+RUN wget -q 'https://archive.apache.org/dist/hadoop/core/hadoop-2.4.0/hadoop-2.4.0.tar.gz'
+RUN wget -q 'https://github.com/apache/hbase/archive/0.94.14.tar.gz' && mv 0.94.14.tar.gz hbase-0.94.14.tar.gz
+RUN wget -q 'https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz'
+RUN svn checkout http://svn.apache.org/repos/asf/nutch/branches/2.x/ nutch-sources
+
+# Setup system user and group to own and run Hadoop
+RUN addgroup hadoop && adduser --ingroup hadoop hduser
+RUN usermod -a -G hadoop zookeeper
+
+# Setup SSH keys for passwordless access
+RUN su -l -c 'ssh-keygen -t rsa -f /home/hduser/.ssh/id_rsa -P ""' hduser && \
+ cat /home/hduser/.ssh/id_rsa.pub | su -l -c 'tee -a /home/hduser/.ssh/authorized_keys' hduser
+ADD config/ssh-config /home/hduser/.ssh/config
+RUN chmod 600 /home/hduser/.ssh/config
+RUN chown hduser /home/hduser/.ssh/config
+
+# Fix Ubuntu 13.10 SSH daemon problem with docker: http://docs.docker.io/en/latest/examples/running_ssh_service/
+RUN sed -ri 's/session[[:blank:]]+required[[:blank:]]+pam_loginuid.so/session optional pam_loginuid.so/g' /etc/pam.d/sshd
+
+# Deploy and setup file permissions
+RUN tar xvfz /root/hadoop-2.4.0.tar.gz -C /opt && \
+ ln -s /opt/hadoop-2.4.0 /opt/hadoop && \
+ chown -R root:root /opt/hadoop-2.4.0 && \
+ mkdir /opt/hadoop-2.4.0/logs && \
+ chown -R hduser:hadoop /opt/hadoop-2.4.0/logs
+
+# Unpack and compile Google protobuf
+RUN tar xvfz /root/protobuf-2.5.0.tar.gz -C /opt && \
+ chown -R root:root /opt/protobuf-2.5.0
+RUN cd /opt/protobuf-2.5.0 && ./configure && make && make check && make install
+
+# Deploy and setup file permissions
+RUN tar xvfz /root/hbase-0.94.14.tar.gz -C /opt && \
+ chown -R root:root /opt/hbase-0.94.14
+# http://hbase.apache.org/book.html#basic.prerequisites - 4.1.1 - HBase has to be compiled from sources :(
+RUN vim -c '%s/2.4.0a/2.5.0/g' -c '%s/2.0.0-alpha/2.4.0' -c 'x' /opt/hbase-0.94.14/pom.xml
+RUN cd /opt/hbase-0.94.14/ && mvn clean install assembly:single -Dhadoop.profile=2.0 -DskipTests
+
+# link binaries, create logs directory
+RUN ln -s /opt/hbase-0.94.14/target/hbase-0.94.14/hbase-0.94.14 /opt/hbase && mkdir /opt/hbase/logs && \
+ chown -R hduser:hadoop /opt/hbase/logs
+
+ # Deploy and setup file permissions
+RUN mv /root/nutch-sources /opt/apache-nutch-2.x && \
+ chown -R root:root /opt/apache-nutch-2.x && \
+ mkdir /opt/apache-nutch-2.x/logs && \
+ chown -R hduser:hadoop /opt/apache-nutch-2.x/logs
+
+# Setup hduser environment
+ADD config/bashrc /home/hduser/.bashrc
+
+# Add Hadoop, HBase and nutch configs
+ADD config/core-site.xml /tmp/hadoop-etc/core-site.xml
+ADD config/mapred-site.xml /tmp/hadoop-etc/mapred-site.xml
+ADD config/hdfs-site.xml /tmp/hadoop-etc/hdfs-site.xml
+ADD config/yarn-site.xml /tmp/hadoop-etc/yarn-site.xml
+ADD config/hbase-site.xml /tmp/hbase-etc/hbase-site.xml
+ADD config/nutch-site.xml /tmp/nutch-etc/nutch-site.xml
+RUN mv /tmp/hadoop-etc/* /opt/hadoop/etc/hadoop
+RUN mv /tmp/hbase-etc/* /opt/hbase/conf/
+RUN mv /tmp/nutch-etc/* /opt/apache-nutch-2.x/conf/
+
+
+ENV NUTCH_ROOT /opt/apache-nutch-2.x
+RUN echo 'gora.datastore.default=org.apache.gora.hbase.store.HBaseStore' >> /opt/apache-nutch-2.x/conf/gora.properties
+RUN vim -c 'g/name="gora-hbase"/+1d' -c 'x' $NUTCH_ROOT/ivy/ivy.xml
+RUN vim -c 'g/name="gora-hbase"/-1d' -c 'x' $NUTCH_ROOT/ivy/ivy.xml
+RUN cd $NUTCH_ROOT && ant runtime
+RUN ln -s /opt/apache-nutch-2.x/runtime/local /opt/nutch
+ENV NUTCH_HOME /opt/nutch
+ENV HADOOP_HOME /opt/hadoop
+ENV NUTCHSERVER_PORT 8899
+
+
+# expose ports, probably not all, probably some old (pre Hadoop 2.0) ports as well
+# NUTCH
+EXPOSE 8899
+
+# Expose SSHD
+EXPOSE 22
+
+# QuorumPeerMain (Zookeeper)
+EXPOSE 2181 39534
+
+# NameNode (HDFS)
+EXPOSE 8020 50070 9000
+
+# DataNode (HDFS)
+EXPOSE 50010 50020 50075
+
+# SecondaryNameNode (HDFS)
+EXPOSE 50090
+
+# Trackers
+EXPOSE 50030 50060
+
+#HBASE
+EXPOSE 6000 60010 60020 60030
+
+# Thrift
+EXPOSE 9090 9095
+
+
+# Create start script
+ADD config/run-services.sh /root/run-services.sh
+RUN chmod +x /root/run-services.sh
+
+CMD ["/root/run-services.sh"]
Added: nutch/branches/2.x/docker/hbase/README.md
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/README.md?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/README.md (added)
+++ nutch/branches/2.x/docker/hbase/README.md Tue Feb 3 01:29:23 2015
@@ -0,0 +1,59 @@
+# Nutch Dockerfile #
+
+This directory contains Dockerfile of [Nutch](http://nutch.apache.org) for [Docker](https://www.docker.com/)'s [automated build](https://registry.hub.docker.com/u/dockerfile/elasticsearch/) published to the [Hub Registry](https://registry.hub.docker.com/).
+
+## What is Nutch?
+
+![Nutch logo](https://wiki.apache.org/nutch/FrontPage?action=AttachFile&do=get&target=nutch_logo_medium.gif "Nutch")
+
+Apache Nutch is a highly extensible and scalable open source web crawler software project.
+
+Nutch can run on a single machine, but gains a lot of its strength from running in a Hadoop cluster
+
+## Docker Image
+
+Current configuration of this image consists of components:
+
+* Hadoop 2.4.0
+* HBase 0.94.14
+* Nutch 2.x
+
+## Base Image
+
+* [stackbrew/ubuntu:saucy](https://registry.hub.docker.com/u/stackbrew/ubuntu/)
+
+
+## Installation
+
+1. Install [Docker](https://www.docker.com/).
+
+2a. Download automated build from public hub registry `docker pull nutch/nutch_with_hbase_hadoop`
+
+2b. Build from files in this directory:
+
+ $(boot2docker shellinit)
+ docker build -t <new name for image> .
+
+## Usage
+
+Start docker
+
+ boot2docker up
+ $(boot2docker shellinit)
+
+Start an image and enter shell. First command will start image and will print on stdout standard logs.
+
+ IMAGE_PID=$(docker run -i -t nutch_with_hbase_hadoop)
+ docker exec -i -t $IMAGE_PID bash
+
+
+Nutch is located in /opt/nutch/ and is almost ready to run.
+Review configuration in /opt/nutch/conf/ and you can start crawling.
+
+ echo 'http://nutch.apache.org' > seed.txt
+ /opt/nutch/bin/nutch inject seed.txt
+ /opt/nutch/bin/nutch generate -topN 10 -- this will return batchId
+ /opt/nutch/bin/nutch fetch <batchId>
+ /opt/nutch/bin/nutch parse <batchId>
+ /opt/nutch/bin/nutch updatedb <batchId>
+ [...]
Added: nutch/branches/2.x/docker/hbase/config/bashrc
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/bashrc?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/bashrc (added)
+++ nutch/branches/2.x/docker/hbase/config/bashrc Tue Feb 3 01:29:23 2015
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+export JAVA_HOME=/usr/lib/jvm/java-7-oracle/
+export HADOOP_PREFIX=/opt/hadoop
+export HADOOP_HOME=/opt/hadoop
+export HADOOP_COMMON_HOME=$HADOOP_PREFIX
+export HADOOP_HDFS_HOME=$HADOOP_PREFIX
+export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
+
+export HADOOP_CONF_DIR=$HADOOP_HDFS_HOME/etc/hadoop
+
+export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_PREFIX/lib/native
+export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib -Djava.net.preferIPv4Stack=true"
+
+export PATH=$PATH:$HADOOP_PREFIX/bin:$HADOOP_PREFIX/sbin
+export HBASE_MANAGES_ZK=false
Added: nutch/branches/2.x/docker/hbase/config/core-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/core-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/core-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/core-site.xml Tue Feb 3 01:29:23 2015
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration>
+ <property>
+ <name>fs.default.name</name>
+ <value>hdfs://localhost:9000</value>
+ </property>
+ <property>
+ <name>hadoop.tmp.dir</name>
+ <value>/home/hduser/data/hadoop/tmp</value>
+ </property>
+ <property>
+ <name>fs.checkpoint.dir</name>
+ <value>/home/hduser/data/hadoop/snn</value>
+ </property>
+</configuration>
Added: nutch/branches/2.x/docker/hbase/config/hbase-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/hbase-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/hbase-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/hbase-site.xml Tue Feb 3 01:29:23 2015
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration>
+ <property>
+ <name>hbase.rootdir</name>
+ <value>hdfs://localhost:9000/hbase</value>
+ </property>
+ <property>
+ <name>hbase.cluster.distributed</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>hbase.zookeeper.quorum</name>
+ <value>localhost</value>
+ </property>
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+</configuration>
Added: nutch/branches/2.x/docker/hbase/config/hdfs-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/hdfs-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/hdfs-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/hdfs-site.xml Tue Feb 3 01:29:23 2015
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration>
+ <property>
+ <name>dfs.replication</name>
+ <value>1</value>
+ </property>
+ <property>
+ <name>dfs.name.dir</name>
+ <value>/home/hduser/data/hadoop/nn</value>
+ </property>
+ <property>
+ <name>dfs.data.dir</name>
+ <value>/home/hduser/data/hadoop/dn</value>
+ </property>
+ <property>
+ <name>dfs.permissions.supergroup</name>
+ <value>hadoop</value>
+ </property>
+</configuration>
Added: nutch/branches/2.x/docker/hbase/config/mapred-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/mapred-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/mapred-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/mapred-site.xml Tue Feb 3 01:29:23 2015
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration>
+ <property>
+ <name>mapreduce.framework.name</name>
+ <value>yarn</value>
+ </property>
+</configuration>
Added: nutch/branches/2.x/docker/hbase/config/nutch-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/nutch-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/nutch-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/nutch-site.xml Tue Feb 3 01:29:23 2015
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration>
+ <property>
+ <name>http.agent.name</name>
+ <value>your-crawler-name</value>
+ </property>
+ <property>
+ <name>storage.data.store.class</name>
+ <value>org.apache.gora.hbase.store.HBaseStore</value>
+ <description>Default class for storing data</description>
+ </property>
+</configuration>
Added: nutch/branches/2.x/docker/hbase/config/run-services.sh
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/run-services.sh?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/run-services.sh (added)
+++ nutch/branches/2.x/docker/hbase/config/run-services.sh Tue Feb 3 01:29:23 2015
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+# append ssh public key to authorized_keys file
+#echo $AUTHORIZED_SSH_PUBLIC_KEY >> /home/hduser/.ssh/authorized_keys
+
+# format the namenode if it's not already done
+su -l -c 'mkdir -p /home/hduser/data/hadoop/nn /home/hduser/data/hadoop/dn && /opt/hadoop/bin/hadoop namenode -format' hduser
+
+# start ssh daemon
+service ssh start
+
+# start zookeeper used for HDFS
+service zookeeper start
+
+# clear hadoop logs
+rm -fr /opt/hadoop/logs/*
+
+# start HDFS
+su -l -c '/opt/hadoop/sbin/start-dfs.sh' hduser
+
+# start YARN
+su -l -c '/opt/hadoop/sbin/start-yarn.sh' hduser
+
+#start HBASE
+su -l -c '/opt/hbase/bin/start-hbase.sh' hduser
+
+#start HBASE thrift
+su -l -c '/opt/hbase/bin/hbase thrift start > /opt/hbase/logs/thrift.log 2>&1 &' hduser
+
+sleep 1
+
+# tail log directory
+tail -n 1000 -f /opt/hadoop/logs/*.log /opt/hbase/logs/*.log /opt/nutch/logs/*.log
Added: nutch/branches/2.x/docker/hbase/config/ssh-config
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/ssh-config?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/ssh-config (added)
+++ nutch/branches/2.x/docker/hbase/config/ssh-config Tue Feb 3 01:29:23 2015
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Host *
+ StrictHostKeyChecking no
Added: nutch/branches/2.x/docker/hbase/config/yarn-site.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/docker/hbase/config/yarn-site.xml?rev=1656615&view=auto
==============================================================================
--- nutch/branches/2.x/docker/hbase/config/yarn-site.xml (added)
+++ nutch/branches/2.x/docker/hbase/config/yarn-site.xml Tue Feb 3 01:29:23 2015
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<configuration>
+ <property>
+ <name>yarn.nodemanager.aux-services</name>
+ <value>mapreduce_shuffle</value>
+ </property>
+
+ <property>
+ <name>yarn.application.classpath</name>
+ <value>/opt/hadoop/etc/hadoop,/opt/hadoop/share/hadoop/common/*,/opt/hadoop/share/hadoop/common/lib/*,/opt/hadoop/share/hadoop/hdfs/*,/opt/hadoop/share/hadoop/hdfs/lib/*,/opt/hadoop/share/hadoop/mapreduce/*,/opt/hadoop/share/hadoop/mapreduce/lib/*,/opt/hadoop/share/hadoop/yarn/*,/opt/hadoop/share/hadoop/yarn/lib/*</value>
+ </property>
+
+ <property>
+ <description>
+ Number of seconds after an application finishes before the nodemanager's
+ DeletionService will delete the application's localized file directory
+ and log directory.
+
+ To diagnose Yarn application problems, set this property's value large
+ enough (for example, to 600 = 10 minutes) to permit examination of these
+ directories. After changing the property's value, you must restart the
+ nodemanager in order for it to have an effect.
+
+ The roots of Yarn applications' work directories is configurable with
+ the yarn.nodemanager.local-dirs property (see below), and the roots
+ of the Yarn applications' log directories is configurable with the
+ yarn.nodemanager.log-dirs property (see also below).
+ </description>
+ <name>yarn.nodemanager.delete.debug-delay-sec</name>
+ <value>600</value>
+ </property>
+
+</configuration>