Monday, 21 September 2015

Hadoop Setup & Installation

# How to Setup virtual box for hadoop
http://www.wikihow.com/Install-Ubuntu-on-VirtualBox

## It will install java source in your machine at /usr/lib/jvm/java-8-oracle
sudo apt-get install oracle-java8-installer

# add user in group
sudo addgroup hadoop
sudo adduser --ingroup hadoop hduser
                           or
# Creat user for hadoop
groupadd hadoop
useradd -G hadoop hduser
passwrd hduser

# Installing SSH
sudo apt-get install openssh-server
or
yum -y install openssh-server openssh-clients


# For updated vi editor
apt-get install vim

# Configuring SSH
# First login with hduser (and from now use only hduser account for further steps)
sudo su hduser

# Generate ssh key for hduser account
ssh-keygen -t rsa -P ""

## Copy id_rsa.pub to authorized keys from hduser
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
ssh localhost

# Example ssh reference

https://help.ubuntu.com/lts/serverguide/openssh-server.html

# Create folder kapil
Mkdir kapil 

# Move to kapil
cd kapil

# wget haddop tar from below directory
http://apache.claz.org/hadoop/common/.gz

# Download in folder hadoop and untar the tar
tar -xzvf

# Add permision to user
sudo chown hduser -R /kapil/hadoop

## Create Hadoop temp directories for Namenode and Datanode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/namenode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/datanode

## Again assign ownership of this Hadoop temp folder to Hadoop user
sudo chown hduser -R /kapil/hadoop_tmp/

## User profile : Update $HOME/.bashrc
vi ~/.bashrc

## Update hduser configuration file by appending the
## following environment variables at the end of this file.
# -- HADOOP ENVIRONMENT VARIABLES START -- #
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
export HADOOP_HOME=/kapil/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
# -- HADOOP ENVIRONMENT VARIABLES END -- #

## Edit Configuration file : vi /kapil/hadoop/etc/hadoop/hadoop-env.sh
## Change below section
# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51


## Edit Configuration file :


## Format Namenode by executing below command
hdfs namenode -format

## Start all Hadoop daemons
# Start hdfs daemons
start-dfs.sh

# Start MapReduce daemons:
start-yarn.sh

# Instead both of these above command you can also use
start-all.sh

## Verify Hadoop daemons through JVM command:
jps

## Hadoop administrators.
Open your default browser and visit to the following links.
# For ResourceManager
Http://localhost:8088

# For NameNode
Http://localhost:50070

# See the job history on server
mr-jobhistory-daemon.sh start historyserver

# Execute below command for run default example1
cd /usr/local/hadoop
hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar pi 2 5

# Execute below command for run default example2
 hadoop fs -copyFromLocal /home/hduser/input.txt input
 hadoop fs -ls input
 hadoop fs -cat input/input.txt
 hadoop fs -cat output
 bin\yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar wordcount input output

# Other useful command
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
export PATH=${JAVA_HOME}/bin:${PATH}
export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar
export HADOOP_HOME=/kapil/hadoop
export PATH=$PATH:$HADOOP_HOME/bin

#Check list or user
cut -d: -f1 /etc/passwd
#Check list or group
cut -d: -f1 /etc/group

#remove user
userdel userName

sudo usermod -a -G sudo hduser

No comments: