Apache Sqoop + Mysql + Hadoop


wget http://www-us.apache.org/dist/sqoop/1.99.6/sqoop-1.99.6-bin-hadoop200.tar.gz

tar -xvf sqoop-1.99.6-bin-hadoop200.tar.gz

cd sqoop-1.99.6-bin-hadoop200

vim server/conf/catalina.properties

common.loader=${catalina.base}/lib,${catalina.base}/lib/*.jar,${catalina.home}/lib,${catalina.home}/lib/*.jar,${catalina.home}/../lib/*.jar,/opt/hadoop-2.6.3/share/hadoop/common/*.jar,/opt/hadoop-2.6.3/share/hadoop/common/lib/*.jar,/opt/hadoop-2.6.3/share/hadoop/hdfs/*.jar,/opt/hadoop-2.6.3/share/hadoop/hdfs/lib/*.jar,/opt/hadoop-2.6.3/share/hadoop/mapreduce/*.jar,/opt/hadoop-2.6.3/share/hadoop/mapreduce/lib/*.jar,/opt/hadoop-2.6.3/share/hadoop/yarn/*.jar,/opt/hadoop-2.6.3/share/hadoop/yarn/lib/*.jar,/usr/lib/hive/lib/*.jar

mkdir lib

vim server/conf/sqoop.properties
org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/opt/hadoop-2.6.3/etc/hadoop

./bin/sqoop2-tool verify
...
Verification was successful.

./bin/sqoop2-server start

$HADOOP_HOME/sbin/start-all.sh

sudo apt-get install mysql-server

mysql -p -uroot -e "create database testdb;"

mysql -p -uroot -e "CREATE TABLE testdb.exampletable (id INT,data VARCHAR(100));"

mysql -p -uroot -e "INSERT INTO testdb.exampletable(id,data) VALUES(1,'test data');"

./bin/sqoop2-shell

sqoop:000> create link -c 1
Creating link for connector with id 1
0 [main] WARN org.apache.hadoop.util.NativeCodeLoader - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Please fill following values to create new link object
Name: mysql

Link configuration

JDBC Driver Class: com.mysql.jdbc.Driver
JDBC Connection String: jdbc:mysql://localhost/testdb
Username: root
Password: ****
JDBC Connection Properties:
There are currently 0 values in the map:
entry# protocol=tcp
There are currently 1 values in the map:
protocol = tcp
entry#
New link was successfully created with validation status OK and persistent id 1

sqoop:000> create link -c 3
Creating link for connector with id 3
Please fill following values to create new link object
Name: hdfs

Link configuration

HDFS URI: hdfs://localhost:8020/
Hadoop conf directory: /opt/hadoop-2.6.3/etc/hadoop
New link was successfully created with validation status OK and persistent id 2

sqoop:000> create job -f 1 -t 2
Creating job for links with from id 1 and to id 2
Please fill following values to create new job object
Name: sqoopy

From database configuration

Schema name: testdb
Table name: exampletable
Table SQL statement:
Table column names: id,data
Partition column name: id
Null value allowed for the partition column:
Boundary query:

Incremental read

Check column:
Last value:

To HDFS configuration

Override null value:
Null value:
Output format:
0 : TEXT_FILE
1 : SEQUENCE_FILE
Choose: 0
Compression format:
0 : NONE
1 : DEFAULT
2 : DEFLATE
3 : GZIP
4 : BZIP2
5 : LZO
6 : LZ4
7 : SNAPPY
8 : CUSTOM
Choose: 0
Custom compression format:
Output directory: sqoop
Append mode:

Throttling resources

Extractors:
Loaders:
New job was successfully created with validation status OK and persistent id 1

sqoop:000> start job -j 1

$HADOOP_HOME/bin/hadoop dfs -cat /user//sqoop/ca29e7ba-1155-4614-8529-4e11b0260170.txt