Unit Test for WrodCount MapReduce


import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.*;

public class WordCountTest {

@Test
public void mapperTest() throws IOException, InterruptedException {
Text value = new Text("Hello");
new MapDriver().withMapper(new TokenizerMapper())
.withInput(new IntWritable(), value).withOutput(new Text("Hello"), new IntWritable(1)).runTest();
}

@Test
public void reducerTest() throws IOException, InterruptedException {
new ReduceDriver().withReducer(new IntSumReducer())
.withInput(new Text("Hello"), Arrays.asList(new IntWritable(1), new IntWritable(1)))
.withOutput(new Text("Hello"), new IntWritable(2)).runTest();
}
}

Advertisements

Hcatalog + Pig


$DERBY_HOME/bin/startNetworkServer

$HADOOP_HOME/sbin$ ./start-all.sh

$HIVE_HOME/bin/hiveserver2

$HIVE_HOME/bin/hive --service metastore

$HADOOP_HOME/bin/hadoop fs -mkdir hdfs://localhost/hcatalog-example

$HADOOP_HOME/bin/hadoop fs -put /tmp/test-dataset.csv hdfs://localhost/hcatalog-example

$HADOOP_HOME/bin/hadoop fs -cat hdfs://localhost/hcatalog-example/test-dataset.csv | head -n 4

playerID,yearID,gameNum,gameID,teamID,lgID,GP,startingPos
aaronha01,1955,0,NLS195507120,ML1,NL,1,
aaronha01,1956,0,ALS195607100,ML1,NL,1,
aaronha01,1957,0,NLS195707090,ML1,NL,1,9

./hcat -e "CREATE TABLE default.gamedataset (playerID STRING,yearID INT,gameNum INT ,gameID STRING ,teamID STRING ,lgID STRING ,GP INT,startingPosts INT) PARTITIONED BY (country STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';"

./hcat -e "alter table default.gamedataset add partition (country='DE') location '/hcatalog-example/'"

vim $PIG_HOME/conf/pig.properties

pig.load.default.statements=/opt/pig-0.15.0/.pigbootup

vim $PIG_HOME/.pigbootup

REGISTER /opt/apache-hive-1.2.1-bin/hcatalog/share/hcatalog/hcatalog-core-1.2.1.jar;
REGISTER /opt/apache-hive-1.2.1-bin/lib/hive-exec-1.2.1.jar;
REGISTER /opt/apache-hive-1.2.1-bin/lib/hive-metastore-1.2.1.jar;

vim $HOME/.bashrc

export PIG_OPTS=-Dhive.metastore.uris=thrift://localhost:9083
export PIG_CLASSPATH=$HCAT_HOME/share/hcatalog/*:$HIVE_HOME/lib/*

$PIG_HOME/bin/pig -useHCatalog

A = load 'default.gamedataset' using org.apache.hive.hcatalog.pig.HCatLoader();
dump A;

Install Apache Hive


tar -xvf db-derby-10.11.1.1-bin.tar.gz
tar -xvf apache-hive-1.2.1-bin.tar.gz
vim ~/.bashrc

export HADOOP_HOME=/opt/hadoop-2.6.2
export DERBY_HOME=/opt/db-derby-10.11.1.1-bin
export PATH=$PATH:$DERBY_HOME/bin
export CLASSPATH=$CLASSPATH:$DERBY_HOME/lib/derby.jar:$DERBY_HOME/lib/derbytools.jar
export HIVE_HOME=/opt/apache-hive-1.2.1-bin

mkdir $DERBY_HOME/data
cp $HIVE_HOME/conf/hive-default.xml.template $HIVE_HOME/conf/hive-site.xml
vim $HIVE_HOME/conf/hive-site.xml

< configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby://localhost:1527/metastore_db;create=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
</configuration>

vim $HIVE_HOME/conf/jpox.properties

javax.jdo.PersistenceManagerFactoryClass =
org.jpox.PersistenceManagerFactoryImpl
org.jpox.autoCreateSchema = false
org.jpox.validateTables = false
org.jpox.validateColumns = false
org.jpox.validateConstraints = false
org.jpox.storeManagerType = rdbms
org.jpox.autoCreateSchema = true
org.jpox.autoStartMechanismMode = checked
org.jpox.transactionIsolation = read_committed
javax.jdo.option.DetachAllOnCommit = true
javax.jdo.option.NontransactionalRead = true
javax.jdo.option.ConnectionDriverName = org.apache.derby.jdbc.ClientDriver
javax.jdo.option.ConnectionURL = jdbc:derby://localhost:1527/metastore_db;create = true
javax.jdo.option.ConnectionUserName = APP
javax.jdo.option.ConnectionPassword = mine

$DERBY_HOME/bin/startNetworkServer

$HADOOP_HOME/sbin$ ./start-all.sh

$HADOOP_HOME/bin/hadoop fs -mkdir /tmp
$HADOOP_HOME/bin/hadoop fs -mkdir /user/hive/warehouse
$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp
$HADOOP_HOME/bin/hadoop fs -chmod g+w /user/hive/warehouse

cp $DERBY_HOME/lib/derbyclient.jar $HIVE_HOME/lib/

$HIVE_HOME/bin/hiveserver2

$HIVE_HOME/bin/hive --service metastore

$HIVE_HOME/bin$ ./beeline
Beeline version 1.2.1 by Apache Hive
beeline: !connect jdbc:hive2://localhost:10000/default "hive" ""
Connecting to jdbc:hive2://localhost:10000/default
Connected to: Apache Hive (version 1.2.1)
Driver: Hive JDBC (version 1.2.1)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://localhost:10000/default:

Install Hadoop Cluster with Docker


name@host:/#wget https://raw.github.com/zettio/weave/master/weave
name@host:/#sudo mv weave /usr/local/bin/
name@host:/#sudo chmod +x /usr/local/bin/weave
name@host:/#weave launch
name@host:/#eval $(weave env)
name@host:/#docker run --name=hnode1 --hostname=hnode1.weave.local -it ubuntu /bin/bash
name@host:/#eval $(weave env)
name@host:/#docker run --name=hnode2 --hostname=hnode2.weave.local -it ubuntu /bin/bash

root@hnode1:~#passwd
Enter new UNIX password:root
root@hnode1:~#sudo apt-get update
root@hnode1:~#apt-get install ssh-server vim rsync
root@hnode1:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config
root@hnode1:~#/etc/init.d/ssh start
root@hnode1:~#ssh-keygen

root@hnode2:~#passwd
Enter new UNIX password:root
root@hnode2:~#sudo apt-get update
root@hnode2:~#apt-get install ssh-server vim rsync
root@hnode2:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config
root@hnode2:~#/etc/init.d/ssh start
root@hnode2:~#ssh-keygen

root@hnode1:/opt# cat /root/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
root@hnode1:/opt# chmod 700 ~/.ssh
root@hnode1:/opt# chmod 600 ~/.ssh/authorized_keys
root@hnode1:~#scp /root/.ssh/id_rsa.pub root@hnode2:/tmp

root@hnode2:~#scp /root/.ssh/id_rsa.pub root@hnode1:/tmp
root@hnode1:~#cat /tmp/id_rsa.pub >> ~/.ssh/authorized_keys

root@hnode2:/opt# cat /root/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
root@hnode2:~#cat /tmp/id_rsa.pub >> ~/.ssh/authorized_keys
root@hnode2:/opt# cat /root/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
root@hnode2:~#chmod 700 ~/.ssh
root@hnode2:~#chmod 600 ~/.ssh/authorized_keys

root@hnode1:/# wget http://apache.openmirror.de/hadoop/common/hadoop-2.6.2/hadoop-2.6.2.tar.gz
root@hnode1:/#tar -xvf hadoop-2.6.2.tar.gz
root@hnode1:/#mv hadoop-2.6.2 /opt/

root@hnode1:/# vim /opt/hadoop-2.6.2/etc/hadoop/core-site.xml

<property>
<name>fs.defaultFS</name>
<value>hdfs://hnode1.weave.local/</value>
</property>

root@hnode1:/# vim /opt/hadoop-2.6.2/etc/hadoop/hdfs-site.xml

<property>
<name>dfs.namenode.name.dir</name>
<value>/disk1/hdfs/name,/disk1/hdfs/name,/remote/hdfs/name</value>
</property>

root@hnode1:/# vim /opt/hadoop-2.6.2/etc/hadoop/yarn-site.xml

<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1.weave.local</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/disk1/nm-local-dir,/disk2/nm-local-dir</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce.shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>16384</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>16</value>
</property>

root@hnode1:/# mkdir -p /disk1/hdfs/name /remote/hdfs/name /disk1/nm-local-dir /disk2/nm-local-dir

root@hnode2:/# wget http://apache.openmirror.de/hadoop/common/hadoop-2.6.2/hadoop-2.6.2.tar.gz
root@hnode2:/#tar -xvf hadoop-2.6.2.tar.gz
root@hnode2:/#mv hadoop-2.6.2 /opt/

root@hnode2:/# vim /opt/hadoop-2.6.2/etc/hadoop/core-site.xml

<property>
<name>fs.defaultFS</name>
<value>hdfs://hnode1.weave.local/</value>
</property>

root@hnode2:/# vim /opt/hadoop-2.6.2/etc/hadoop/hdfs-site.xml

<property>
<name>dfs.datanode.data.dir</name>
<value>/disk1/hdfs/data,/disk2/hdfs/data</value></property>

root@hnode2:/# mkdir -p /disk1/hdfs/data /disk2/hdfs/data

name@host:/#HN1_IP=$(docker inspect -f '{{ .NetworkSettings.IPAddress }}' hnode1)

name@host:/#HN2_IP=$(docker inspect -f '{{ .NetworkSettings.IPAddress }}' hnode2)

name@host:/#scp ~/Downloads/jdk-8u65-linux-x64.tar.gz root@$HN1_IP:/opt

name@host:/#scp ~/Downloads/jdk-8u65-linux-x64.tar.gz root@$HN2_IP:/opt

root@hnode1:/opt# tar -xvf jdk-8u65-linux-x64.tar.gz
root@hnode1:~# echo "JAVA_HOME=jdk1.8.0_65" >> /etc/environment
root@hnode1:~# echo "PATH=$JAVA_HOME/bin:$PATH" >> /etc/environment
root@hnode1:~#source /etc/environment
root@hnode1:~# export JAVA_HOME=jdk1.8.0_65

root@hnode2:/opt# tar -xvf jdk-8u65-linux-x64.tar.gz
root@hnode2:~# echo "JAVA_HOME=jdk1.8.0_65" >> /etc/environment
root@hnode2:~# echo "PATH=$JAVA_HOME/bin:$PATH" >> /etc/environment
root@hnode2:~#source /etc/environment
root@hnode2:~# export JAVA_HOME=jdk1.8.0_65

root@hnode1:/# /opt/hadoop-2.6.2/bin/hdfs namenode -format mycluster

root@hnode1:~# /opt/hadoop-2.6.2/sbin/start-dfs.sh
root@hnode2:~# /opt/hadoop-2.6.2/sbin/start-dfs.sh

Install Cloudera CDH Cluster on Docker

name@host:/#wget https://raw.github.com/zettio/weave/master/weave
name@host:/#sudo mv weave /usr/local/bin/
name@host:/#sudo chmod +x /usr/local/bin/weave
name@host:/#weave launch
name@host:/#eval $(weave env)
name@host:/#docker run --name=cnode1 --hostname=cnode1.weave.local -it ubuntu /bin/bash
name@host:/#eval $(weave env)
name@host:/#docker run --name=cnode2 --hostname=cnode2.weave.local -it ubuntu /bin/bash

root@cnode1:~#passwd
Enter new UNIX password:root
root@cnode1:~#apt-get install ssh-server
root@cnode1:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config
root@cnode1:~#/etc/init.d/ssh start
root@cnode1:~#ssh-keygen

root@cnode2:~#passwd
Enter new UNIX password:root
root@cnode2:~#apt-get install ssh-server
root@cnode2:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config
root@cnode2:~#/etc/init.d/ssh start
root@cnode2:~#ssh-keygen

root@cnode1:~#scp /root/.ssh/id_rsa.pub root@cnode2:/tmp
root@cnode2:~#cat /tmp/id_rsa.pub >> ~/.ssh/authorized_keys
root@cnode2:~#chmod 700 ~/.ssh
root@cnode2:~#chmod 600 ~/.ssh/authorized_keys

root@cnode1:~#wget http://archive.cloudera.com/cm5/installer/latest/cloudera-manager-installer.bin
root@cnode1:~#chmod u+x cloudera-manager-installer.bin
root@cnode1:~#sudo ./cloudera-manager-installer.bin

root@cnode1:/# ifconfig
eth0 Link encap:Ethernet HWaddr 02:42:ac:11:00:07
inet addr:172.17.0.7 Bcast:0.0.0.0
// open http://172.17.0.7:7180
// login with admin/admin
// select Cloudera Enterprise
Data Hub Edition Trial
//click continue
//enter cnode1.weave.local
cnode2.weave.local as server
//click continiue
// select Install Oracle Java SE Development Kit (JDK)
// select Single User Mode
// enter root/root as username and password
// click continue
// click continue
// click finish

Ionic ngCordova social-sharing


ionic start ionic-ngcordova-social-sharing blank
cd ionic-ngcordova-social-sharing
bower install ngCordova
cordova platform add android
cordova plugin add https://github.com/EddyVerbruggen/SocialSharing-PhoneGap-Plugin.git
in index.html change to following:
<head>
...
< script src="lib/ngCordova/dist/ng-cordova.js" > < /script >
...
</head>
....
<body ng-controller="AppCtrl" >
....
<ion-content>
<textarea placeholder="Comments" ng-model="comments"></textarea>
<button class="button button-full button-positive" on-click="share()" on-tap="share()">
Share
</button>
<button class="button button-full button-positive" on-tap="shareByEmail()">
Share By Email
</button>

</ion-content>

...
</body>
...

in app.js change following:

angular.module('starter', ['ionic','ngCordova'])
...
.run(...
...
).controller('AppCtrl', function($scope, $cordovaSocialSharing) {$scope.comments = "";
$scope.share = function() {
$cordovaSocialSharing
.share($scope.comments, "My comment") // Share via native share sheet
.then(function(result) {
// Success!
}, function(err) {
// An error occured. Show a message to the user
});
}
$scope.shareByEmail = function() {

$cordovaSocialSharing
.shareViaEmail($scope.comments, "My comment", "someone@example.com")
.then(function(result) {
// Success!
}, function(err) {
// An error occurred. Show a message to the user
});
}
});

ionic build --android

ionic run --emulator

Install Hortonworks Hadoop with Ambari on Docker


name@host:/#docker run --name=data1 --hostname=data1 -it ubuntu /bin/bash

name@host:/#docker run --name=data2 --hostname=data2 -it ubuntu /bin/bash

name@host:/#docker run --name=name --hostname=name --link data1:data1 --link data2:data2 -it ubuntu /bin/bash

name@host:/#docker run --name=ambari --hostname=ambari --link data1:data1 --link data2:data2 --link name:name -it ubuntu /bin/bash

name@host:/#sudo bash -c "curl https://raw.githubusercontent.com/jpetazzo/pipework/master/pipework > /usr/local/bin/pipework"

name@host:/#sudo chmod u+x /usr/local/bin/pipework

name@host:/#sudo apt-get install iproute2

name@host:/#sudo /usr/local/bin/pipework br1 data1 192.168.0.50/24

name@host:/#sudo /usr/local/bin/pipework br1 data2 192.168.0.51/24

name@host:/#sudo /usr/local/bin/pipework br1 name 192.168.0.53/24

name@host:/#sudo /usr/local/bin/pipework br1 ambari 192.168.0.54/24

root@data1:~#passwd
Enter new UNIX password:root

root@data1:~#apt-get install ssh

root@data1:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config

root@data1:~#/etc/init.d/ssh start
root@data1:~#ssh-keygen
root@data1:~#apt-get update

root@data2:~#passwd
Enter new UNIX password:root

root@data2:~#apt-get install ssh

root@data2:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config

root@data2:~#/etc/init.d/ssh start
root@data2:~#ssh-keygen
root@data1:~#apt-get update

root@name:~#passwd
Enter new UNIX password:root

root@name:~#apt-get install ssh

root@name:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config

root@name:~#/etc/init.d/ssh start
root@name:~#ssh-keygen

root@ambari:~#passwd
Enter new UNIX password:root

root@ambari:~#apt-get install ssh

root@ambari:~#sed -i 's/PermitRootLogin without-password/PermitRootLogin yes/' /etc/ssh/sshd_config

root@ambari:~#/etc/init.d/ssh start
root@ambari:~#ssh-keygen

root@name:~# scp /root/.ssh/id_rsa.pub root@data1:/tmp
root@name:~# scp /root/.ssh/id_rsa.pub root@data2:/tmp

root@data1:~#cat /tmp/id_rsa.pub >> ~/.ssh/authorized_keys
root@data1:~#chmod 700 ~/.ssh
root@data1:~#chmod 600 ~/.ssh/authorized_keys

root@data2:~#cat /tmp/id_rsa.pub >> ~/.ssh/authorized_keys
root@data2:~#chmod 700 ~/.ssh
root@data2:~#chmod 600 ~/.ssh/authorized_keys

root@ambari:~# scp /root/.ssh/id_rsa.pub root@name:/tmp
root@ambari:~# scp /root/.ssh/id_rsa.pub root@data1:/tmp/ambari_id_rsa.pub
root@ambari:~# scp /root/.ssh/id_rsa.pub root@data2:/tmp/ambari_id_rsa.pub

root@data1:~#cat /tmp/ambari_id_rsa.pub >> ~/.ssh/authorized_keys
root@data1:~#/etc/init.d/ssh restart
root@data2:~#cat /tmp/ambari_id_rsa.pub >> ~/.ssh/authorized_keys
root@data2:~#/etc/init.d/ssh restart

root@name:~#cat /tmp/id_rsa.pub >> ~/.ssh/authorized_keys
root@name:~#/etc/init.d/ssh restart
root@name:~#chmod 700 ~/.ssh
root@name:~#chmod 600 ~/.ssh/authorized_keys

root@ambari:/#apt-get update
root@ambari:/#apt-get install postgresql postgresql-contrib
root@ambari:/#/etc/init.d/postgresql start

root@data1:/#wget -nv http://public-repo-1.hortonworks.com/ambari/ubuntu14/2.x/updates/2.1.2/ambari.list -O /etc/apt/sources.list.d/ambari.list

root@data1:/#apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD

root@data1:/#apt-get update

root@data2:/#wget -nv http://public-repo-1.hortonworks.com/ambari/ubuntu14/2.x/updates/2.1.2/ambari.list -O /etc/apt/sources.list.d/ambari.list

root@data2:/#apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD

root@data2:/#apt-get update

root@name:/#wget -nv http://public-repo-1.hortonworks.com/ambari/ubuntu14/2.x/updates/2.1.2/ambari.list -O /etc/apt/sources.list.d/ambari.list

root@name:/#apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD

root@name:/#apt-get update

root@ambari:/#wget -nv http://public-repo-1.hortonworks.com/ambari/ubuntu14/2.x/updates/2.1.2/ambari.list -O /etc/apt/sources.list.d/ambari.list

root@ambari:/#apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD

root@ambari:/#apt-get update

root@ambari:/#apt-get install ambari-server
root@ambari:/#apt-get install ambari-agent

root@ambari:/#ambari-server setup
// select default answer for all questions during setup

root@ambari:/#ambari-server start

name@host:/#docker inspect -f '{{ .NetworkSettings.IPAddress }}' ambari

172.17.0.17

// open in browser http://172.17.0.17:8080 and login with admin/admin as username/password
// click on Launch Install Wizard button
// enter mycluster as cluster name then click next
// select HDP 2.3 then click next
// in target hosts enter
// name
// data1
// data2
// and

root@ambari:/# cat ~/.ssh/id_rsa
-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCAQEAk...47QxOaBp4tIy+9Ezg==
-----END RSA PRIVATE KEY-----

// copy the private key to ssh private key text box
// then click register and Confirm button
// you should see every host is registered successfully , click next button
// in page Choose Services click next button
// in page Assign Masters, select HDFS , YARN + MapReduce2, ZooKeeper,Ambari Metrics , then click next button
// in page Assign Slaves and Clients, select name as name node and data1 as Sname(secondary name node)
// in Assign Slaves and Clients and Customize Services pages, click next button, and after Review page, click Deploy button