Monday, 20 October 2014

JSON File Processing through HIVE

JSON File Processing through HIVE

Step 1: Add the resources

ADD JAR /home/hadoop/work/hive-0.10.0/lib/hive-contrib-0.10.0.jar;
ADD JAR /home/hadoop/work/hive-0.10.0/lib/hive-metastore-0.10.0.jar;
ADD JAR /home/hadoop/work/JSON-Serde.jar;

Step 2: Create the table

CREATE TABLE record (
   id INT,
   city_code ARRAY<INT>,
   email STRING,
   contact STRUCT<Mobile_no:STRING, Telephone_no:STRING>
) ROW FORMAT SERDE 'org.JSONSerDe';

Step 3: Load the data

@Data looks like
{ "id": 1, "city_code": [ 1, 2, 3 ], "email": "joseph@gmail.com", "contact": { "Mobile_no": "val1", "Telephone_no": "val2" } }
{ "id": 2, "city_code": [ 4, 5, 6 ], "email": "james@gmail.com", "contact": { "Mobile_no": "val3", "Telephone_no": "val4" } }
{ "id": 3, "city_code": [ 7, 8, 9 ], "email": "rony@gmail.com", "contact": { "Mobile_no": "val5", "Telephone_no": "val6" } }

 Load data local inpath '/home/hadoop/work/record_data.txt' OVERWRITE INTO TABLE record;

 Step 4 Retrieve the data

 select * from record;
 select contact.Mobile_no from record where id = 1;
 
Note: You can find the JsonSeDe jar from here https://github.com/cloudera/cdh-twitter-example/blob/master/hive-serdes/src/main/java/com/cloudera/hive/serde/JSONSerDe.java

Hive Connection with MongoDB

Hive Connection with MongoDB

Do the processing by hive and store the result into MongoDB.


Download the below jars form 'https://github.com/yc-huang/Hive-mongo/tree/master/release'

ADD JAR /home/hadoop/work/Hive-mongo-master/release/hive-mongo-0.0.2.jar;
ADD JAR /home/hadoop/work/Hive-mongo-master/release/hive-mongo-0.0.2-jar-with-dependencies.jar;
ADD JAR /home/hadoop/work/hive-0.10.0/lib/hive-metastore-0.10.0.jar;
ADD JAR /home/hadoop/work/guava-r06.jar;
ADD JAR /home/hadoop/work/mongo-java-driver-2.6.3.jar;

create table if not exists mongo_test(id int, name String, age int) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

load data local inpath '/home/hadoop/work/mongodata.txt' into table mongo_test;

create external table mongotable(id int, name string, age int)
stored by "org.yong3.hive.mongo.MongoStorageHandler"
with serdeproperties ( "mongo.column.mapping" = "_id,name,age" )
tblproperties ( "mongo.host" = "localhost", "mongo.port" = "27017",
"mongo.db" = "db", "mongo.collection" = "ravi" );

insert overwrite table mongotable select id, name, age from mongo_test;
Related Posts Plugin for WordPress, Blogger...