Monday, 21 September 2015

Hadoop Setup & Installation

# How to Setup virtual box for hadoop
http://www.wikihow.com/Install-Ubuntu-on-VirtualBox

## It will install java source in your machine at /usr/lib/jvm/java-8-oracle
sudo apt-get install oracle-java8-installer

# add user in group
sudo addgroup hadoop
sudo adduser --ingroup hadoop hduser
                           or
# Creat user for hadoop
groupadd hadoop
useradd -G hadoop hduser
passwrd hduser

# Installing SSH
sudo apt-get install openssh-server
or
yum -y install openssh-server openssh-clients


# For updated vi editor
apt-get install vim

# Configuring SSH
# First login with hduser (and from now use only hduser account for further steps)
sudo su hduser

# Generate ssh key for hduser account
ssh-keygen -t rsa -P ""

## Copy id_rsa.pub to authorized keys from hduser
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
ssh localhost

# Example ssh reference

https://help.ubuntu.com/lts/serverguide/openssh-server.html

# Create folder kapil
Mkdir kapil 

# Move to kapil
cd kapil

# wget haddop tar from below directory
http://apache.claz.org/hadoop/common/.gz

# Download in folder hadoop and untar the tar
tar -xzvf

# Add permision to user
sudo chown hduser -R /kapil/hadoop

## Create Hadoop temp directories for Namenode and Datanode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/namenode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/datanode

## Again assign ownership of this Hadoop temp folder to Hadoop user
sudo chown hduser -R /kapil/hadoop_tmp/

## User profile : Update $HOME/.bashrc
vi ~/.bashrc

## Update hduser configuration file by appending the
## following environment variables at the end of this file.
# -- HADOOP ENVIRONMENT VARIABLES START -- #
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
export HADOOP_HOME=/kapil/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
# -- HADOOP ENVIRONMENT VARIABLES END -- #

## Edit Configuration file : vi /kapil/hadoop/etc/hadoop/hadoop-env.sh
## Change below section
# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51


## Edit Configuration file :


## Format Namenode by executing below command
hdfs namenode -format

## Start all Hadoop daemons
# Start hdfs daemons
start-dfs.sh

# Start MapReduce daemons:
start-yarn.sh

# Instead both of these above command you can also use
start-all.sh

## Verify Hadoop daemons through JVM command:
jps

## Hadoop administrators.
Open your default browser and visit to the following links.
# For ResourceManager
Http://localhost:8088

# For NameNode
Http://localhost:50070

# See the job history on server
mr-jobhistory-daemon.sh start historyserver

# Execute below command for run default example1
cd /usr/local/hadoop
hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar pi 2 5

# Execute below command for run default example2
 hadoop fs -copyFromLocal /home/hduser/input.txt input
 hadoop fs -ls input
 hadoop fs -cat input/input.txt
 hadoop fs -cat output
 bin\yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar wordcount input output

# Other useful command
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
export PATH=${JAVA_HOME}/bin:${PATH}
export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar
export HADOOP_HOME=/kapil/hadoop
export PATH=$PATH:$HADOOP_HOME/bin

#Check list or user
cut -d: -f1 /etc/passwd
#Check list or group
cut -d: -f1 /etc/group

#remove user
userdel userName

sudo usermod -a -G sudo hduser

Hadoop/HDFS utility commands

# List of file
hadoop fs -ls
hadoop fs -ls lsr

# Disk  space of file
hadoop fs -du

# Print Summary of disk usage
hadoop fs -dus

# Move file or dir. location
hadoop fs -mv

# Copy file or dir.
hadoop fs -cp

# Removes file or dir.
hadoop fs -rm

# Removes file or dir. recursively
hadoop fs -rmr

# Copy file from local dir to hdfs dir.
hadoop fs -put
hadoop fs -copyFromLocal

# Copy file or dir. to hdfs dir.
hadoop fs -moveFromLocal

# Copy file from hdfs to local dir.
hadoop fs -get [-crc]
hadoop fs -moveToLocal
hadoop fs -copyToLocal

# Merge & copy file from hdfs to local dir.
hadoop fs -getmerge

# Display the file
hadoop fs -cat

# Display the Content of file
hadoop fs  -text

# Create dir.
hadoop fs -mkdir

# Sets target replication factor for files
hadoop fs -setrep [-R] [-w] rep

# Create file
hadoop fs -touchz

# Returns 1 if path exists otherwise 0.
hadoop fs -test -[ezd]

# Print Formate path like  fileblocksize (%b),filename (%n),block size (%o),replication (%r), and modification date (%y, %Y).
hadoop fs -stat [format]

# Show last 1kb fle stdout.
hadoop fs -tail [-f]

# Set permission on file & dir.
hadoop fs -chmod [-R] mode

# Change the owner permission
hadoop fs -chown [-R] [owner][:[group]]

# Set the owning group
hadoop fs -chgrp [-R] group

# Provide information about commaond.
hadoop fs -help