WrodCount MapReduce with Scalding

git clone https://github.com/scalding-io/ProgrammingWithScalding

cd ProgrammingWithScalding/chapter2/

mvn clean install

hadoop fs -mkdir -p /data/input

hadoop fs -mkdir -p /data/output

echo “This is a happy day. A day to remember” > /tmp/input.txt

hadoop fs -put /tmp/input.txt /data/input

hadoop jar /root/repo/ProgrammingWithScalding/chapter2/target/chapter2-0-jar-with-dependencies.jar com.twitter.scalding.Tool WordCountJob –local –input /data/input/input.txt –output /data/output/output.txt

cat /data/output/output.txt
a 2
day 1
day. 1
happy 1
is 1
remember 1
this 1
to 1


Unit Test for WrodCount MapReduce

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.*;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.*;

public class WordCountTest {

public void mapperTest() throws IOException, InterruptedException {
Text value = new Text("Hello");
new MapDriver().withMapper(new TokenizerMapper())
.withInput(new IntWritable(), value).withOutput(new Text("Hello"), new IntWritable(1)).runTest();

public void reducerTest() throws IOException, InterruptedException {
new ReduceDriver().withReducer(new IntSumReducer())
.withInput(new Text("Hello"), Arrays.asList(new IntWritable(1), new IntWritable(1)))
.withOutput(new Text("Hello"), new IntWritable(2)).runTest();