How to Connect Hive and Neo4j? How to load Hive Data into Neo4j database?

I have Hortonworks Sandbox Hive Server2 up and running. So, please download Hortonworks Sandbox if you have not already.

1. Select * from xademo.customer_details (Hive table in Hortonworks Sandbox under xademo schema)
Schema: PhoneNumber,Plan,date,status,balance,imei,region. All are String datatypes.
8 9
2.Create a Hive-Jdbc Connectivity. For this read all rows in Hive and add it to a List as Below in the HiveJdbcclient.java
package org.apache.hive.jdbc;
import java.sql.SQLException;
 import java.sql.Connection;
 import java.sql.ResultSet;
 import java.sql.Statement;
 import java.sql.DriverManager;
 import java.util.ArrayList;
 import java.util.List;
public class HiveJdbcClient {
 //Define the Driver
 private static final String driverName = "org.apache.hive.jdbc.HiveDriver";
public List<CustomerDetails> getCustomerDetailsList() {
 List<CustomerDetails> customerDetailses = null;
 try {
 Class.forName(driverName);
//Connection con = DriverManager.getConnection("jdbc:hive2://n01bdl303.aap.csaa.pri:10000/ubiqa", "ggx4ram", "Hadoop123");
 Connection con = DriverManager.getConnection("jdbc:hive2://localhost:10000/xademo", "root", "hadoop");
//customerDetails is a reference. Don't create the object here.
 CustomerDetails customerDetails = null;
//Put the object customerDetails in LinkedList
 System.out.println("After Connection" + con);
 Statement stmt = con.createStatement();
 //String tableName = "pas_ubi_veh_history";
 String tableName = "customer_details";
 //stmt.executeQuery("drop table if exists " + tableName);
 //ResultSet res = stmt.executeQuery("select * from " + " ubiqa."+tableName+" limit 20");
 ResultSet res = stmt.executeQuery("select * from xademo." + tableName + " limit 20");
 //ResultSet res = stmt.executeQuery("create table" + " ubiqa."+tableName+ " (key int, value string)");
 customerDetailses = new ArrayList<>();
 while (res.next()) {
 //Create a Object for CustomerDetails
 customerDetails = new CustomerDetails();
 customerDetails.setPhNum(res.getString(1));
 customerDetails.setPlan(res.getString(2));
 customerDetails.setDate(res.getString(3));
 customerDetails.setStatus(res.getString(4));
 customerDetails.setBalance(res.getString(5));
 customerDetails.setImei(res.getString(6));
 customerDetails.setRegion(res.getString(7));
 //System.out.println(CustomerDetails.phNum);
 customerDetailses.add(customerDetails);
 }
 } catch (Exception e) {
 e.printStackTrace();
 System.exit(1);
 }
 return customerDetailses;
 }
 }
Create a Schema Mapping in CustomerDetails class. Enable get and set methods to it.
/*
 This Class is created to represent one row of the table customer_details
 Set get,set values of each columns
 */
 package org.apache.hive.jdbc;
/**
 *
 * @author gfp2ram
 */
 public class CustomerDetails {
private String phNum;
 private String plan;
 private String region;
 private String imei;
 private String status;
 private String balance;
 private String date;
/**
 * @return the phNum
 */
 public String getPhNum() {
 return phNum;
 }
/**
 * @param phNum the phNum to set
 */
 public void setPhNum(String phNum) {
 this.phNum = phNum;
 }
/**
 * @return the plan
 */
 public String getPlan() {
 return plan;
 }
/**
 * @param plan the plan to set
 */
 public void setPlan(String plan) {
 this.plan = plan;
 }
/**
 * @return the region
 */
 public String getRegion() {
 return region;
 }
/**
 * @param region the region to set
 */
 public void setRegion(String region) {
 this.region = region;
 }
/**
 * @return the imei
 */
 public String getImei() {
 return imei;
 }
/**
 * @param imei the imei to set
 */
 public void setImei(String imei) {
 this.imei = imei;
 }
/**
 * @return the status
 */
 public String getStatus() {
 return status;
 }
/**
 * @param status the status to set
 */
 public void setStatus(String status) {
 this.status = status;
 }
/**
 * @return the balance
 */
 public String getBalance() {
 return balance;
 }
/**
 * @param balance the balance to set
 */
 public void setBalance(String balance) {
 this.balance = balance;
 }
/**
 * @return the date
 */
 public String getDate() {
 return date;
 }
/**
 * @param date the date to set
 */
 public void setDate(String date) {
 this.date = date;
 }
}
3.Access the Hive data in this class (NodeCreationNeo4j.java) and put it in Neo4j.
/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
 package org.apache.hive.jdbc;
/**
 *
 * @author Varatharajan Giri Ramanathan
 */
 import java.util.List;
 import org.neo4j.cypher.javacompat.ExecutionEngine;
 import org.neo4j.cypher.javacompat.ExecutionResult;
 import org.neo4j.graphdb.Direction;
 import org.neo4j.graphdb.GraphDatabaseService;
 import org.neo4j.graphdb.Label;
 import org.neo4j.graphdb.Node;
 import org.neo4j.graphdb.Relationship;
 import org.neo4j.graphdb.RelationshipType;
 import org.neo4j.graphdb.Transaction;
 import org.neo4j.graphdb.factory.GraphDatabaseFactory;
public class NodeCreationNeo4j {
//private static final String Hive_Neo4j_path = "//ent.rt.csaa.com/aaa/Profiles/Citrix/gfp2ram/workspace/hiveNeo4j";
 private static final String Hive_Neo4j_path = "C:\\Users\\gfp2ram\\Documents\\Neo4j\\default.graphdb";
private static Node NphNum;
 private static Node Nplan;
 //private static Node Ndate;
 //private static Node Nstatus;
 //private static Node Nbalance;
 //private static Node Nimei;
 private static Node Nregion;
 private static Relationship relation1,relation2;
 private static GraphDatabaseService graphDataService;
public static enum NodeTypes implements Label {
 Phone,CallPlan,Region
 }
public static enum RelTypes implements RelationshipType {
 PLN_STATUS, BELONGS_TO
 }
public static void main(String args[]) {
 NodeCreationNeo4j sample = new NodeCreationNeo4j();
 HiveJdbcClient hive_jdbc = new HiveJdbcClient();
 sample.createDatabase(hive_jdbc.getCustomerDetailsList());
//sample.removeData();
 sample.shutdown();
 }
private static void createDatabase(List<CustomerDetails> customerDetailses) {
graphDataService = new GraphDatabaseFactory().newEmbeddedDatabase(Hive_Neo4j_path);
 Transaction transaction = graphDataService.beginTx();
 for (CustomerDetails customerDetails : customerDetailses) {
 NphNum = graphDataService.createNode(NodeTypes.Phone);
 Nplan = graphDataService.createNode(NodeTypes.CallPlan);
 Nregion = graphDataService.createNode(NodeTypes.Region);
 //Ndate = graphDataService.createNode();
 //Nstatus = graphDataService.createNode();
 //Nbalance = graphDataService.createNode();
 //Nimei = graphDataService.createNode();
NphNum.setProperty("phone#", customerDetails.getPhNum());
 NphNum.setProperty("customername", "Test Customer");
 NphNum.setProperty("Location", "Arizona-Pheonix");
Nplan.setProperty("planname", customerDetails.getPlan());
 Nplan.setProperty("plantype", "NighDiscount");
 Nplan.setProperty("discountprice", "$0.45");
Nregion.setProperty("region", customerDetails.getRegion());
 Nregion.setProperty("state", "AZ");
 Nregion.setProperty("country", "USA");
//Ndate.setProperty("date", customerDetails.getDate());
 //Nstatus.setProperty("status", customerDetails.getDate());
 //Nbalance.setProperty("balance", customerDetails.getDate());
 //Nimei.setProperty("imei", customerDetails.getDate());
relation1 = NphNum.createRelationshipTo(Nplan, RelTypes.PLN_STATUS);
 relation1.setProperty("DB Location", "Hive");
relation2 = Nplan.createRelationshipTo(Nregion,RelTypes.BELONGS_TO);
 relation2.setProperty("Sandbox type","Hortonworks");
System.out.println(NphNum.getProperty("phone#").toString() + " IS IN THE PLAN " + Nplan.getProperty("planname").toString()+" AT THE REGION "+Nregion.getProperty("region").toString());
 transaction.success();
 }
 }
void removeData() {
 Transaction transaction = graphDataService.beginTx();
 try {
 //Delete the Outgoing RelationShip first
 NphNum.getSingleRelationship(RelTypes.PLN_STATUS, Direction.OUTGOING).delete();
 NphNum.delete();
 Nplan.delete();
 //third.delete();
 System.out.println("Nodes are Removed Successfully");
 transaction.success();
 } finally {
 //Finish the Transaction
 transaction.finish();
 }
 }
void shutdown() {
 graphDataService.shutdown();
 System.out.println("Neo4j DB is shutdown successfully");
 }
 }
pom.xml:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 <modelVersion>4.0.0</modelVersion>
<groupId>org.apache.hive</groupId>
 <artifactId>hive-jdbc</artifactId>
 <version>0.13.0</version>
 <packaging>jar</packaging>
<name>hive-jdbc</name>
 <url>http://maven.apache.org</url>
<properties>
 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 </properties>
<dependencies>
 <dependency>
 <groupId>org.apache.hive</groupId>
 <artifactId>hive-jdbc</artifactId>
 <version>1.1.0</version>
 </dependency>
 <dependency>
 <groupId>org.apache.hadoop</groupId>
 <artifactId>hadoop-core</artifactId>
 <version>1.2.1</version>
 </dependency>
 <!--Neo4j Dependencies -->
 <dependency>
 <groupId>org.neo4j</groupId>
 <artifactId>neo4j</artifactId>
 <version>2.1.7</version>
 </dependency>
 <dependency>
 <groupId>junit</groupId>
 <artifactId>junit</artifactId>
 <version>3.8.1</version>
 <scope>test</scope>
 </dependency>
</dependencies>
 <build>
 <plugins>
 <plugin>
 <groupId>org.apache.maven.plugins</groupId>
 <artifactId>maven-compiler-plugin</artifactId>
 <version>2.3.2</version>
 <configuration>
 <source>1.7</source>
 <target>1.7</target>
 </configuration>
 </plugin>
 </plugins>
 </build>
 </project>

Results:

This Sysout can be viewed as below. and the same can be viewed in http://localhost:7474 server if you have already installed the Neo4j Server.

cd P:\Workspace\hive-jdbc; “JAVA_HOME=C:\\Program Files\\Java\\jdk1.7.0_71” M2_HOME=C:\\apache-maven-3.2.5 cmd /c “\”\”C:\\apache-maven-3.2.5\\bin\\mvn.bat\” -Dexec.args=\”-classpath %classpath org.apache.hive.jdbc.NodeCreationNeo4j\” -Dexec.executable=\”C:\\Program Files\\Java\\jdk1.7.0_71\\bin\\java.exe\” -DskipTests=true -Dmaven.ext.class.path=\”C:\\Program Files\\NetBeans 8.0.2\\java\\maven-nblib\\netbeans-eventspy.jar\” -Dfile.encoding=UTF-8 org.codehaus.mojo:exec-maven-plugin:1.2.1:exec\””

Running NetBeans Compile On Save execution. Phase execution is skipped and output directories of dependency projects (with Compile on Save turned on) will be used instead of their jar artifacts.
Scanning for projects...
 
------------------------------------------------------------------------
Building hive-jdbc 0.13.0
------------------------------------------------------------------------
--- exec-maven-plugin:1.2.1:exec (default-cli) @ hive-jdbc ---
15/04/06 23:30:26 INFO jdbc.Utils: Supplied authorities: localhost:10000
15/04/06 23:30:26 INFO jdbc.Utils: Resolved authority: localhost:10000
15/04/06 23:30:28 INFO jdbc.HiveConnection: Will try to open client transport with JDBC Uri: jdbc:hive2://localhost:10000/xademo
After Connectionorg.apache.hive.jdbc.HiveConnection@48dd639c
PHONE_NUM IS IN THE PLAN PLAN AT THE REGION REGION
5553947406 IS IN THE PLAN 6290 AT THE REGION R06
7622112093 IS IN THE PLAN 2316 AT THE REGION R02
5092111043 IS IN THE PLAN 6389 AT THE REGION R06
9392254909 IS IN THE PLAN 4002 AT THE REGION R04
7783343634 IS IN THE PLAN 2276 AT THE REGION R02
5534292073 IS IN THE PLAN 6389 AT THE REGION R06
9227087403 IS IN THE PLAN 4096 AT THE REGION R04
9226203167 IS IN THE PLAN 4060 AT THE REGION R04
9221154050 IS IN THE PLAN 4107 AT THE REGION R04
7434378689 IS IN THE PLAN 2002 AT THE REGION R02
7482285225 IS IN THE PLAN 2285 AT THE REGION R02
7788070992 IS IN THE PLAN 2002 AT THE REGION R02
7982300380 IS IN THE PLAN 2276 AT THE REGION R02
9790142194 IS IN THE PLAN 4012 AT THE REGION R04
9226907642 IS IN THE PLAN 4060 AT THE REGION R04
9559185951 IS IN THE PLAN 4276 AT THE REGION R04
7582299877 IS IN THE PLAN 2389 AT THE REGION R02
9422182637 IS IN THE PLAN 4060 AT THE REGION R04
9291295360 IS IN THE PLAN 4324 AT THE REGION R04
Neo4j DB is shutdown successfully
------------------------------------------------------------------------
BUILD SUCCESS
------------------------------------------------------------------------
Total time: 02:09 min
Finished at: 2015-04-06T23:32:17-07:00
Final Memory: 10M/124M
------------------------------------------------------------------------

Please note I have not explained about the concepts I used here. But the code can be easily understood from Java Standpoint.

How to create Neo4j Graph Database Nodes and Relationship using Java API.

How to create Neo4j Graph Database Nodes and Relationship using Java API:

1. Download Neo4j Community Edition and place in a Root Folder (Ex. C:\Program Files (X86)\Neo4j Community\)
2. Create a Maven Project using Eclipse (I used Luna IDE) and add the dependecy as Below.

<dependencies>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
<version>2.1.7</version>
</dependency>
</dependencies>

Create Maven Project:

Create Maven Project

6

2

4

5

Maven Dependencies details you can find from http://neo4j.com/docs/stable/tutorials-java-embedded-setup.html

3. I have just created a simple Node to Node relationship and given the details in the HelloWorldMaven.java program in Comments. Please follow the Comments given inside the Program.


package com.neo4j;

import org.neo4j.graphdb.*;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;

/**
* Created by Varatharajan Giri Ramanathan on 3/22/2015.
*/

public class HelloWorldMaven {
//First the Neo4j DB path is specified
private static final String Neo4j_path=“/Users/gfp2ram/workspace/neo4j”;

//Creating Nodes, RelationShip, GraphDBService
Node first;
Node second;
Relationship relation;
GraphDatabaseService graphDataService;

//List of RelationShipts between the Nodes
private static enum RelTypes implements RelationshipType
{
KNOWS
}

public static void main(String[] args) {

//Creating an Instance to make calls for the functions we have written below.
HelloWorldMaven hello = new HelloWorldMaven();

//Function calls
hello.createDatabase();
hello.removeData();
hello.shutdown();

}
//Always create the Database
void createDatabase() {

//Step : 1 == > Create GraphDatabaseService
graphDataService = new GraphDatabaseFactory().newEmbeddedDatabase(Neo4j_path);

//Step : 2 == > Begin Transaction
Transaction transaction = graphDataService.beginTx();

try {
//Step : 3 == > Creation of Node and Set the Properties
//createNode(), setProperty are the method

first = graphDataService.createNode();
first.setProperty(“name”,”Jackson Hewitt”);

second = graphDataService.createNode();
second.setProperty(“name”,”H&R”);

//Step : 4 ==>; Create Relationship

relation = first.createRelationshipTo(first,RelTypes.KNOWS);
relation.setProperty(“relationship-type”,”knows”);

//Printing out the relationship between first and second nodes
//System.out.println(first.getProperty(“name”).toString());
//System.out.println(relation.getProperty(“relationship-type”).toString());
//System.out.println(second.getProperty(“name”).toString());
System.out.println(first.getProperty(“name”).toString()+”–>”+relation.getProperty(“relationship-type”).toString()+ ” — >” + second.getProperty(“name”).toString());

//Step : 5 ==> ; Success the transaction
transaction.success();
}
finally {
//Step 6: ==>; Finish Transaction
transaction.finish();
}
}

//Once the database is created, the data has to be removed
void removeData() {
//Step 1 : Again create the transaction
Transaction transaction = graphDataService.beginTx();

try {
//Delete the Outgoing RelationShip first
first.getSingleRelationship(RelTypes.KNOWS, Direction.OUTGOING).delete();
System.out.println(“Nodes are Removed Successfully”);
first.delete();
second.delete();
transaction.success();
} finally {
//Finish the Transaction
transaction.finish();
}
}

//The database instance has also be shutdown once created
void shutdown() {
//Shutdown the graphDataService
graphDataService.shutdown();
System.out.println(“Neo4j DB is shutdown successfully”);
}
}

Compile the Program and See the Output:

7