Add files via upload

ashishoist91 · web-flow · commit c4a9a3066ebb · 2018-05-20T15:36:46.000+05:30
diff --git a/MySQLoHiveDataIngestion.java b/MySQLoHiveDataIngestion.java
@@ -0,0 +1,127 @@
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkContext;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.hive.HiveContext;
+import org.apache.spark.sql.types.DataTypes;
+
+public class MySQLoHiveDataIngestion {
+
+	
+	private static final Logger logger = Logger.getLogger(MySQLoHiveDataIngestion.class.getName());
+	private static Connection conection = null;
+	
+	public static void main(String[] args) throws ClassNotFoundException, SQLException {
+		logger.info("Calling main Method");
+		logger.info("Creating Java Spark Context");
+		JavaSparkContext javaSparkContext = new JavaSparkContext("local", "MySQLoHiveDataIngestion");
+		logger.info("Creating SQLContext");
+		//SparkContext sqlContext = new org.apache.spark.sql.SQLContext(javaSparkContext);
+		HiveContext hiveContext = new HiveContext(javaSparkContext);
+		Properties props = new Properties();
+		props.setProperty("user", "root");
+		props.setProperty("password", "root");
+		//Dataset<Row> result = hiveContext.read().jdbc("jdbc:mysql://localhost:3306/hadoop", "transaction", props);;
+		
+		DataFrame result = hiveContext.read().jdbc("jdbc:mysql://localhost:3306/hadoop", "transaction", props);
+		//result.show();
+		//result.printSchema();
+		//Dataset<Row> newResult = result.wi
+		//conection = MySQLoHiveDataIngestion.getConnection();
+		//String createTable = "CREATE TABLE IF NOT EXISTS " + " transaction ("
+		//;
+		conection = getConnection();
+		String createTable  = MySQLoHiveDataIngestion.getHiveTableQuery();
+		DataFrame newTable = hiveContext.sql(createTable);
+		
+		
+		List<Row> arrayList= new ArrayList<Row>();
+		arrayList = result.collectAsList();
+		
+		
+		//newTable.withColumn(result, col)
+		newTable.printSchema();
+	}
+	
+	public static Connection getConnection() throws ClassNotFoundException, SQLException {
+		
+		Class.forName("com.mysql.jdbc.Driver");  
+		Connection conection=DriverManager.getConnection(  
+		"jdbc:mysql://localhost:3306/hadoop","root","root");  
+		return conection;
+		
+	}
+	
+	
+	public static String getHiveTableQuery() throws SQLException {
+		
+		Statement st = conection.createStatement();
+		ResultSet rs = st.executeQuery("SELECT * FROM transaction");
+		String createTable = "CREATE TABLE IF NOT EXISTS " + " transaction (";
+		ResultSetMetaData rsMetaData = rs.getMetaData();
+		for(int i=1;i<=rsMetaData.getColumnCount();i++) {
+			logger.info("Column Name " + rsMetaData.getColumnName(i));
+			logger.info("Column DataType " + rsMetaData.getColumnTypeName(i));
+			//logger.info("Column DataType " + rsMetaData.);
+			//org.apache.spark.sql.types.DataType
+			
+			createTable += rsMetaData.getColumnName(i) + " " +
+							getHiveDataType(rsMetaData.getColumnTypeName(i)) + ",";	
+		}
+		
+		createTable = createTable.trim().substring(0, createTable.length()-1) + ") STORED AS PARQUET" ;
+		
+		logger.info("Create Table Statement : " + createTable);
+		conection.close();
+		return createTable;
+		
+	}
+	
+	
+	public static String getHiveDataType(String dataType) {
+		
+		if (dataType == "STRING" || dataType == "CHAR" || dataType == "VARCHAR2" || dataType == "VARCHAR") {
+	        return "STRING";
+	    } else if (dataType == "INT") {
+	        return "INT";
+	    } else if (dataType == "LONG") {
+	        return "BIGINT";
+	    } else if (dataType == "FLOAT") {
+	        return "FLOAT";
+	    } else if (dataType == "DOUBLE") {
+	        return "DOUBLE";
+	    } else if (dataType == "BOOLEAN") {
+	        return "TINYINT";
+	    } else if (dataType == "BYTE") {
+	        return "SMALLINT";
+	    } else if (dataType == "DECIMAL") {
+	    	return "DECIMAL";
+	    }else if (dataType == "DATE") {
+	    	return "DATE";
+	    }
+		
+		
+		return null;
+	}
+	
+	
+	
+	
+	
+	
+	
+
+}
diff --git a/Spark Submit b/Spark Submit
@@ -0,0 +1,11 @@
+spark-submit --master local[*] --jars /usr/share/java/mysql-connector-java.jar --class MySQLoHiveDataIngestion MySQLToHive-0.0.1-SNAPSHOT.jar
+spark-submit --master yarn \
+    --deploy-mode cluster \
+    --driver-memory 4g \
+    --executor-memory 2g \
+    --executor-cores 4 \
+    --queue default \
+    --class MySQLoHiveDataIngestion MySQLToHive-0.0.1-SNAPSHOT.jar
+
+export HADOOP_CONF_DIR=$HADOOP_HOME/usr/local/hadoop
+
diff --git a/pom.xml b/pom.xml
@@ -0,0 +1,70 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>MySQLToHive</groupId>
+  <artifactId>MySQLToHive</artifactId>
+  <version>0.0.1-SNAPSHOT</version>
+  
+  <dependencies>
+		<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
+		<dependency>
+		    <groupId>org.apache.spark</groupId>
+		    <artifactId>spark-core_2.11</artifactId>
+		    <version>1.6.2</version>
+		</dependency>
+
+		
+		<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
+		<dependency>
+		    <groupId>org.apache.spark</groupId>
+		    <artifactId>spark-sql_2.11</artifactId>
+		    <version>1.6.2</version>
+		</dependency>
+		
+		<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
+		<dependency>
+		    <groupId>org.apache.spark</groupId>
+		    <artifactId>spark-hive_2.11</artifactId>
+		    <version>1.6.2</version>
+		</dependency>
+		
+		<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
+		<dependency>
+		    <groupId>mysql</groupId>
+		    <artifactId>mysql-connector-java</artifactId>
+		    <version>5.1.38</version>
+		</dependency>
+		
+		<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc -->
+		<dependency>
+		    <groupId>org.apache.hive</groupId>
+		    <artifactId>hive-jdbc</artifactId>
+		    <version>1.2.2</version>
+		</dependency>
+		
+		
+				<!-- https://mvnrepository.com/artifact/log4j/log4j -->
+		<dependency>
+		    <groupId>log4j</groupId>
+		    <artifactId>log4j</artifactId>
+		    <version>1.2.17</version>
+		</dependency>
+		
+				<!-- https://mvnrepository.com/artifact/commons-lang/commons-lang -->
+		<dependency>
+		    <groupId>commons-lang</groupId>
+		    <artifactId>commons-lang</artifactId>
+		    <version>2.6</version>
+		</dependency>
+		
+		<!-- https://mvnrepository.com/artifact/com.databricks/spark-avro -->
+		<dependency>
+		    <groupId>com.databricks</groupId>
+		    <artifactId>spark-avro_2.10</artifactId>
+		    <version>1.0.0</version>
+		</dependency>
+				
+				
+		
+
+</dependencies>
+</project>