Hcatalog + Pig


$DERBY_HOME/bin/startNetworkServer

$HADOOP_HOME/sbin$ ./start-all.sh

$HIVE_HOME/bin/hiveserver2

$HIVE_HOME/bin/hive --service metastore

$HADOOP_HOME/bin/hadoop fs -mkdir hdfs://localhost/hcatalog-example

$HADOOP_HOME/bin/hadoop fs -put /tmp/test-dataset.csv hdfs://localhost/hcatalog-example

$HADOOP_HOME/bin/hadoop fs -cat hdfs://localhost/hcatalog-example/test-dataset.csv | head -n 4

playerID,yearID,gameNum,gameID,teamID,lgID,GP,startingPos
aaronha01,1955,0,NLS195507120,ML1,NL,1,
aaronha01,1956,0,ALS195607100,ML1,NL,1,
aaronha01,1957,0,NLS195707090,ML1,NL,1,9

./hcat -e "CREATE TABLE default.gamedataset (playerID STRING,yearID INT,gameNum INT ,gameID STRING ,teamID STRING ,lgID STRING ,GP INT,startingPosts INT) PARTITIONED BY (country STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';"

./hcat -e "alter table default.gamedataset add partition (country='DE') location '/hcatalog-example/'"

vim $PIG_HOME/conf/pig.properties

pig.load.default.statements=/opt/pig-0.15.0/.pigbootup

vim $PIG_HOME/.pigbootup

REGISTER /opt/apache-hive-1.2.1-bin/hcatalog/share/hcatalog/hcatalog-core-1.2.1.jar;
REGISTER /opt/apache-hive-1.2.1-bin/lib/hive-exec-1.2.1.jar;
REGISTER /opt/apache-hive-1.2.1-bin/lib/hive-metastore-1.2.1.jar;

vim $HOME/.bashrc

export PIG_OPTS=-Dhive.metastore.uris=thrift://localhost:9083
export PIG_CLASSPATH=$HCAT_HOME/share/hcatalog/*:$HIVE_HOME/lib/*

$PIG_HOME/bin/pig -useHCatalog

A = load 'default.gamedataset' using org.apache.hive.hcatalog.pig.HCatLoader();
dump A;

Advertisements

Install Apache Hive


tar -xvf db-derby-10.11.1.1-bin.tar.gz
tar -xvf apache-hive-1.2.1-bin.tar.gz
vim ~/.bashrc

export HADOOP_HOME=/opt/hadoop-2.6.2
export DERBY_HOME=/opt/db-derby-10.11.1.1-bin
export PATH=$PATH:$DERBY_HOME/bin
export CLASSPATH=$CLASSPATH:$DERBY_HOME/lib/derby.jar:$DERBY_HOME/lib/derbytools.jar
export HIVE_HOME=/opt/apache-hive-1.2.1-bin

mkdir $DERBY_HOME/data
cp $HIVE_HOME/conf/hive-default.xml.template $HIVE_HOME/conf/hive-site.xml
vim $HIVE_HOME/conf/hive-site.xml

< configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:derby://localhost:1527/metastore_db;create=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
</configuration>

vim $HIVE_HOME/conf/jpox.properties

javax.jdo.PersistenceManagerFactoryClass =
org.jpox.PersistenceManagerFactoryImpl
org.jpox.autoCreateSchema = false
org.jpox.validateTables = false
org.jpox.validateColumns = false
org.jpox.validateConstraints = false
org.jpox.storeManagerType = rdbms
org.jpox.autoCreateSchema = true
org.jpox.autoStartMechanismMode = checked
org.jpox.transactionIsolation = read_committed
javax.jdo.option.DetachAllOnCommit = true
javax.jdo.option.NontransactionalRead = true
javax.jdo.option.ConnectionDriverName = org.apache.derby.jdbc.ClientDriver
javax.jdo.option.ConnectionURL = jdbc:derby://localhost:1527/metastore_db;create = true
javax.jdo.option.ConnectionUserName = APP
javax.jdo.option.ConnectionPassword = mine

$DERBY_HOME/bin/startNetworkServer

$HADOOP_HOME/sbin$ ./start-all.sh

$HADOOP_HOME/bin/hadoop fs -mkdir /tmp
$HADOOP_HOME/bin/hadoop fs -mkdir /user/hive/warehouse
$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp
$HADOOP_HOME/bin/hadoop fs -chmod g+w /user/hive/warehouse

cp $DERBY_HOME/lib/derbyclient.jar $HIVE_HOME/lib/

$HIVE_HOME/bin/hiveserver2

$HIVE_HOME/bin/hive --service metastore

$HIVE_HOME/bin$ ./beeline
Beeline version 1.2.1 by Apache Hive
beeline: !connect jdbc:hive2://localhost:10000/default "hive" ""
Connecting to jdbc:hive2://localhost:10000/default
Connected to: Apache Hive (version 1.2.1)
Driver: Hive JDBC (version 1.2.1)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://localhost:10000/default: