# How to Setup virtual box for hadoop
http://www.wikihow.com/Install-Ubuntu-on-VirtualBox
## It will install java source in your machine at /usr/lib/jvm/java-8-oracle
sudo apt-get install oracle-java8-installer
# add user in group
sudo addgroup hadoop
sudo adduser --ingroup hadoop hduser
or
# Creat user for hadoop
groupadd hadoop
useradd -G hadoop hduser
passwrd hduser
# Installing SSH
sudo apt-get install openssh-server
or
yum -y install openssh-server openssh-clients
# For updated vi editor
apt-get install vim
# Configuring SSH
# First login with hduser (and from now use only hduser account for further steps)
sudo su hduser
# Generate ssh key for hduser account
ssh-keygen -t rsa -P ""
## Copy id_rsa.pub to authorized keys from hduser
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
ssh localhost
# Example ssh reference
https://help.ubuntu.com/lts/serverguide/openssh-server.html
# Create folder kapil
Mkdir kapil
# Move to kapil
cd kapil
# wget haddop tar from below directory
http://apache.claz.org/hadoop/common/.gz
# Download in folder hadoop and untar the tar
tar -xzvf
# Add permision to user
sudo chown hduser -R /kapil/hadoop
## Create Hadoop temp directories for Namenode and Datanode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/namenode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/datanode
## Again assign ownership of this Hadoop temp folder to Hadoop user
sudo chown hduser -R /kapil/hadoop_tmp/
## User profile : Update $HOME/.bashrc
vi ~/.bashrc
## Update hduser configuration file by appending the
## following environment variables at the end of this file.
# -- HADOOP ENVIRONMENT VARIABLES START -- #
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
export HADOOP_HOME=/kapil/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
# -- HADOOP ENVIRONMENT VARIABLES END -- #
## Edit Configuration file : vi /kapil/hadoop/etc/hadoop/hadoop-env.sh
## Change below section
# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
## Edit Configuration file :
http://www.wikihow.com/Install-Ubuntu-on-VirtualBox
## It will install java source in your machine at /usr/lib/jvm/java-8-oracle
sudo apt-get install oracle-java8-installer
# add user in group
sudo addgroup hadoop
sudo adduser --ingroup hadoop hduser
or
# Creat user for hadoop
groupadd hadoop
useradd -G hadoop hduser
passwrd hduser
# Installing SSH
sudo apt-get install openssh-server
or
yum -y install openssh-server openssh-clients
# For updated vi editor
apt-get install vim
# Configuring SSH
# First login with hduser (and from now use only hduser account for further steps)
sudo su hduser
# Generate ssh key for hduser account
ssh-keygen -t rsa -P ""
## Copy id_rsa.pub to authorized keys from hduser
cat $HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
ssh localhost
# Example ssh reference
https://help.ubuntu.com/lts/serverguide/openssh-server.html
# Create folder kapil
Mkdir kapil
# Move to kapil
cd kapil
# wget haddop tar from below directory
http://apache.claz.org/hadoop/common/
# Download in folder hadoop and untar the tar
tar -xzvf
# Add permision to user
sudo chown hduser -R /kapil/hadoop
## Create Hadoop temp directories for Namenode and Datanode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/namenode
sudo mkdir -p /kapil/hadoop_tmp/hdfs/datanode
## Again assign ownership of this Hadoop temp folder to Hadoop user
sudo chown hduser -R /kapil/hadoop_tmp/
## User profile : Update $HOME/.bashrc
vi ~/.bashrc
## Update hduser configuration file by appending the
## following environment variables at the end of this file.
# -- HADOOP ENVIRONMENT VARIABLES START -- #
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
export HADOOP_HOME=/kapil/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
# -- HADOOP ENVIRONMENT VARIABLES END -- #
## Edit Configuration file : vi /kapil/hadoop/etc/hadoop/hadoop-env.sh
## Change below section
# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
## Edit Configuration file :
hdfs namenode -format
## Start all Hadoop daemons
# Start hdfs daemons
start-dfs.sh
# Start MapReduce daemons:
start-yarn.sh
# Instead both of these above command you can also use
start-all.sh
## Verify Hadoop daemons through JVM command:
jps
## Hadoop administrators.
Open your default browser and visit to the following links.
# For ResourceManager
Http://localhost:8088
# For NameNode
Http://localhost:50070
# See the job history on server
mr-jobhistory-daemon.sh start historyserver
# Execute below command for run default example1
cd /usr/local/hadoop
hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar pi 2 5# Execute below command for run default example2
hadoop fs -copyFromLocal /home/hduser/input.txt input
hadoop fs -ls input
hadoop fs -cat input/input.txt
hadoop fs -cat output
bin\yarn jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar wordcount input output
# Other useful command
export JAVA_HOME=/usr/lib/jvm/jdk1.7.0_51
export PATH=${JAVA_HOME}/bin:${PATH}
export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar
export HADOOP_HOME=/kapil/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
#Check list or user
cut -d: -f1 /etc/passwd
#Check list or group
cut -d: -f1 /etc/group
#remove user
userdel userName
sudo usermod -a -G sudo hduser