You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hadoop.apache.org by Juliano Atanazio <ju...@gmail.com> on 2016/06/25 16:32:45 UTC

Manual Installation: CentOS 7 + SystemD + Unit Files + Hadoop at boot

Hi.

I'm a novice in Hadoop.
I'm trying to install Hadoop (single node) manually in CentOS 7 with
SystemD to start Hadoop at boot.
But, when I start the DFS service (systemctl start dfs), it starts and then
seconds later dies...
I have "googled" for days and nothing about SystemD...
Below are the steps I have done (with the Java environment OpenJDK with pre
installed):

Excuse-me for my bad english :(




=================================================================================================
# yum erase NetworkManager{,-{libnm,tui,wifi}}

# groupadd -r hdfs

# useradd -r -g hdfs -d /usr/local/hdfs -s /bin/bash -k /etc/skel -c 'HDFS
System User' -m hdfs

# mkdir /usr/local/hdfs/data

# yum install ssh rsync

# cat << EOF >> ~hdfs/.bashrc

export HADOOP_COMMON_HOME='/usr/local/hdfs/hadoop'
export HADOOP_MAPRED_HOME="\${HADOOP_COMMON_HOME}"
export HADOOP_HDFS_HOME="\${HADOOP_COMMON_HOME}"
export YARN_HOME="\${HADOOP_COMMON_HOME}"
export JAVA_HOME='/usr/local/openjdk'
export JRE_HOME="${JAVA_HOME}/jre"
export
PATH="\${PATH}:\${HADOOP_COMMON_HOME}/bin:\${HADOOP_COMMON_HOME}/sbin:\${JAVA_HOME}/bin"
EOF

# chown -R hdfs: ~hdfs/

# su - hdfs

$ tar xf /usr/src/hadoop-2.7.2.tar.gz

$ mv hadoop-2.7.2/ hadoop/

$ rm -f ${HADOOP_COMMON_HOME}/{{,s}bin,etc/hadoop,libexec}/*.cmd

$ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/core-site.xml
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://0.0.0.0:9000</value>
        <description>NameNode URI</description>
    </property>
</configuration>
EOF


$ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/yarn-site.xml
<configuration>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>hadoop</value>
        <description>The hostname of the ResourceManager</description>
    </property>

    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
        <description>shuffle service for MapReduce</description>
    </property>
</configuration>
EOF


$ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/hdfs-site.xml
<configuration>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///usr/local/hdfs/data/data</value>
        <description>DataNode directory for storing data
chunks.</description>
    </property>

    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///usr/local/hdfs/data/name</value>
        <description>NameNode directory for namespace and transaction logs
storage.</description>
    </property>

    <property>
        <name>dfs.replication</name>
        <value>3</value>
        <description>Number of replication for each chunk.</description>
    </property>

    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
        <description>Enable or disable webhdfs. Defaults to
false</description>
    </property>

</configuration>
EOF

$ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/mapred-site.xml
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
        <description>Execution framework.</description>
    </property>
</configuration>
EOF

$ ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa

$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

$ chmod 0600 ~/.ssh/authorized_keys

$ ssh localhost 'echo test'
test

$ hdfs namenode -format

$ start-dfs.sh

http://localhost:50070/

$ hdfs dfs -mkdir -p /user/${USER}


$ cat << EOF > /tmp/foo.txt
linha 1
linha 2
linha 3
EOF

$ hdfs dfs -put /tmp/foo.txt /user/hdfs/

$ hdfs dfs -cat /user/hdfs/foo.txt

$ jps


# cat << EOF > /lib/systemd/system/hadoop-namenode.service
[Unit]
Description=DFS
After=syslog.target network.target
DefaultDependencies=true

[Service]
Type=simple
User=hdfs
Group=hdfs
Environment=YARN_HOME=/usr/local/hdfs/hadoop
Environment=HADOOP_HDFS_HOME=/usr/local/hdfs/hadoop
Environment=HADOOP_COMMON_HOME=/usr/local/hdfs/hadoop
Environment=JAVA_HOME=/usr/local/openjdk
Environment=HADOOP_MAPRED_HOME=/usr/local/hdfs/hadoop
OOMScoreAdjust=-1000
ExecStart=/usr/local/hdfs/hadoop/sbin/hadoop-daemon.sh start namenode
ExecStop=/usr/local/hdfs/hadoop/sbin/hadoop-daemon.sh stop namenode
TimeoutSec=300
[Install]
WantedBy=multi-user.target
EOF

systemctl enable dfs

systemctl start dfs

Re: Manual Installation: CentOS 7 + SystemD + Unit Files + Hadoop at boot

Posted by Rohan Rajeevan <ro...@gmail.com>.
Can you post the namenode logs?

On Sat, Jun 25, 2016 at 9:32 AM, Juliano Atanazio <ju...@gmail.com>
wrote:

> Hi.
>
> I'm a novice in Hadoop.
> I'm trying to install Hadoop (single node) manually in CentOS 7 with
> SystemD to start Hadoop at boot.
> But, when I start the DFS service (systemctl start dfs), it starts and
> then seconds later dies...
> I have "googled" for days and nothing about SystemD...
> Below are the steps I have done (with the Java environment OpenJDK with
> pre installed):
>
> Excuse-me for my bad english :(
>
>
>
>
>
> =================================================================================================
> # yum erase NetworkManager{,-{libnm,tui,wifi}}
>
> # groupadd -r hdfs
>
> # useradd -r -g hdfs -d /usr/local/hdfs -s /bin/bash -k /etc/skel -c 'HDFS
> System User' -m hdfs
>
> # mkdir /usr/local/hdfs/data
>
> # yum install ssh rsync
>
> # cat << EOF >> ~hdfs/.bashrc
>
> export HADOOP_COMMON_HOME='/usr/local/hdfs/hadoop'
> export HADOOP_MAPRED_HOME="\${HADOOP_COMMON_HOME}"
> export HADOOP_HDFS_HOME="\${HADOOP_COMMON_HOME}"
> export YARN_HOME="\${HADOOP_COMMON_HOME}"
> export JAVA_HOME='/usr/local/openjdk'
> export JRE_HOME="${JAVA_HOME}/jre"
> export
> PATH="\${PATH}:\${HADOOP_COMMON_HOME}/bin:\${HADOOP_COMMON_HOME}/sbin:\${JAVA_HOME}/bin"
> EOF
>
> # chown -R hdfs: ~hdfs/
>
> # su - hdfs
>
> $ tar xf /usr/src/hadoop-2.7.2.tar.gz
>
> $ mv hadoop-2.7.2/ hadoop/
>
> $ rm -f ${HADOOP_COMMON_HOME}/{{,s}bin,etc/hadoop,libexec}/*.cmd
>
> $ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/core-site.xml
> <configuration>
>     <property>
>         <name>fs.defaultFS</name>
>         <value>hdfs://0.0.0.0:9000</value>
>         <description>NameNode URI</description>
>     </property>
> </configuration>
> EOF
>
>
> $ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/yarn-site.xml
> <configuration>
>     <property>
>         <name>yarn.resourcemanager.hostname</name>
>         <value>hadoop</value>
>         <description>The hostname of the ResourceManager</description>
>     </property>
>
>     <property>
>         <name>yarn.nodemanager.aux-services</name>
>         <value>mapreduce_shuffle</value>
>         <description>shuffle service for MapReduce</description>
>     </property>
> </configuration>
> EOF
>
>
> $ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/hdfs-site.xml
> <configuration>
>     <property>
>         <name>dfs.datanode.data.dir</name>
>         <value>file:///usr/local/hdfs/data/data</value>
>         <description>DataNode directory for storing data
> chunks.</description>
>     </property>
>
>     <property>
>         <name>dfs.namenode.name.dir</name>
>         <value>file:///usr/local/hdfs/data/name</value>
>         <description>NameNode directory for namespace and transaction logs
> storage.</description>
>     </property>
>
>     <property>
>         <name>dfs.replication</name>
>         <value>3</value>
>         <description>Number of replication for each chunk.</description>
>     </property>
>
>     <property>
>         <name>dfs.webhdfs.enabled</name>
>         <value>true</value>
>         <description>Enable or disable webhdfs. Defaults to
> false</description>
>     </property>
>
> </configuration>
> EOF
>
> $ cat << EOF > ${HADOOP_COMMON_HOME}/etc/hadoop/mapred-site.xml
> <configuration>
>     <property>
>         <name>mapreduce.framework.name</name>
>         <value>yarn</value>
>         <description>Execution framework.</description>
>     </property>
> </configuration>
> EOF
>
> $ ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
>
> $ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
>
> $ chmod 0600 ~/.ssh/authorized_keys
>
> $ ssh localhost 'echo test'
> test
>
> $ hdfs namenode -format
>
> $ start-dfs.sh
>
> http://localhost:50070/
>
> $ hdfs dfs -mkdir -p /user/${USER}
>
>
> $ cat << EOF > /tmp/foo.txt
> linha 1
> linha 2
> linha 3
> EOF
>
> $ hdfs dfs -put /tmp/foo.txt /user/hdfs/
>
> $ hdfs dfs -cat /user/hdfs/foo.txt
>
> $ jps
>
>
> # cat << EOF > /lib/systemd/system/hadoop-namenode.service
> [Unit]
> Description=DFS
> After=syslog.target network.target
> DefaultDependencies=true
>
> [Service]
> Type=simple
> User=hdfs
> Group=hdfs
> Environment=YARN_HOME=/usr/local/hdfs/hadoop
> Environment=HADOOP_HDFS_HOME=/usr/local/hdfs/hadoop
> Environment=HADOOP_COMMON_HOME=/usr/local/hdfs/hadoop
> Environment=JAVA_HOME=/usr/local/openjdk
> Environment=HADOOP_MAPRED_HOME=/usr/local/hdfs/hadoop
> OOMScoreAdjust=-1000
> ExecStart=/usr/local/hdfs/hadoop/sbin/hadoop-daemon.sh start namenode
> ExecStop=/usr/local/hdfs/hadoop/sbin/hadoop-daemon.sh stop namenode
> TimeoutSec=300
> [Install]
> WantedBy=multi-user.target
> EOF
>
> systemctl enable dfs
>
> systemctl start dfs
>