Run Wordcount Mapreduce Example with YARN

// download hadoop-2.6.3.tar.gz
tar -xvf ~/Downloads/hadoop-2.6.3.tar.gz -C ~/opt/
cd ~/opt/hadoop-2.6.3
ssh-keygen
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys

mkdir -p /disk1/hdfs/name
mkdir -p /remote/hdfs/name
mkdir -p /disk1/hdfs/data
mkdir -p /disk2/hdfs/data
mkdir -p /disk1/hdfs/namesecondary
mkdir -p /disk2/hdfs/namesecondary
mkdir -p /disk1/nm-local-dir
mkdir -p /disk2/nm-local-dir

vim etc/hadoop/core-site.xml

<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost/</value>
</property>
</configuration>

vim etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/disk1/hdfs/name,/remote/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/disk1/hdfs/data,/disk2/hdfs/data</value>
</property>
<property> <name>dfs.namenode.checkpoint.dir</name>
<value>/disk1/hdfs/namesecondary,/disk2/hdfs/namesecondary</value>
</property>
</configuration>

vim etc/hadoop/yarn-site.xml

<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>localhost</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/disk1/nm-local-dir,/disk2/nm-local-dir</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>16384</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>16</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>100</value>
</property>

<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1228</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>9830</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>9830</value>
</property>

</configuration>

cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml

vim etc/hadoop/mapred-site.xml

<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>

<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>1228</value>
</property>
<property>
<name>yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx983m</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1228</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>1228</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx983m</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx983m</value>
</property>

</configuration>

vim etc/hadoop/hadoop-env.sh

export JAVA_HOME=/opt/jdk

./bin/hdfs namenode -format mycluster
./sbin/start-dfs.sh
./sbin/start-yarn.sh
./bin/hadoop dfs -mkdir -p hdfs://localhost/wc/in
echo “Hello world, Hello” > /tmp/input.txt
./bin/hadoop dfs -copyFromLocal /tmp/inout.txt hdfs://localhost/wc/in
./bin/hadoop jar ./share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.3.jar wordcount hdfs://localhost/wc/in hdfs://localhost/wc/out

Advertisements

Install Apache Tez 0.7.0

// install jdk 1.7
// install hadoop 2.7
// install maven 3.3
// install npm and bower

wget https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz

tar -xvf protobuf-2.5.0.tar.gz

cd protobuf-2.5.0/

sudo apt-get install g++
sudo ./configure
sudo make
sudo make check
sudo make install
sudo ldconfig
cd ../

wget http://apache.lauf-forum.at/tez/0.7.0/apache-tez-0.7.0-src.tar.gz

tar -xvf apache-tez-0.7.0-src.tar.gz

cd apache-tez-0.7.0-src

$HADOOP_HOME/bin/hadoop version
Hadoop 2.7.1

// update hadoop.version in pom.xml to 2.7.1 and protobuf.version to 2.5.0

mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true

$HADOOP_HOME/sbin/start-dfs.sh

$HADOOP_HOME/bin/hadoop dfs -mkdir -p hdfs://localhost/apps/tez

$HADOOP_HOME/bin/hadoop dfs -copyFromLocal tez-dist/target/tez-0.7.0.tar.gz hdfs://localhost/apps/tez

mkdir conf
vim conf/tez-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>tez.lib.uris</name>
<value>${fs.defaultFS}/apps/tez/tez-0.7.0.tar.gz</value>
</property>
</configuration>

export TEZ_HOME=$(pwd)
export TEZ_JARS=$TEZ_HOME/tez-dist/target/tez-0.7.0
export TEZ_CONF_DIR=$TEZ_HOME/conf
export HADOOP_CLASSPATH=$TEZ_CONF_DIR:$TEZ_JARS/*:$TEZ_JARS/lib/*:$HADOOP_CLASSPATH

mkdir in
mkdir out

vim in/test.txt
Hello World!

rm $TEZ_HOME/tez-dist/target/tez-0.7.0/lib/slf4j-log4j12-1.7.5.jar
$HADOOP_HOME/sbin/stop-dfs.sh
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/stop-yarn.sh
$HADOOP_HOME/sbin/start-yarn.sh

$HADOOP_HOME/bin/hadoop dfs -mkdir -p hdfs://localhost/apps/in
$HADOOP_HOME/bin/hadoop dfs -copyFromLocal in/test.txt hdfs://localhost/apps/in

$HADOOP_HOME/bin/hadoop jar $TEZ_HOME/tez-examples/target/tez-examples-0.7.0.jar orderedwordcount hdfs://localhost/apps/in hdfs://localhost/apps/out