Skip to content

Commit 0b90a88

Browse files
authored
Add files via upload
1 parent 82ebc07 commit 0b90a88

File tree

5 files changed

+148
-0
lines changed

5 files changed

+148
-0
lines changed

Employee.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import java.io.Serializable;
2+
3+
public class Employee implements Serializable {
4+
5+
/**
6+
*
7+
*/
8+
private static final long serialVersionUID = 1L;
9+
private String employeeName;
10+
private String department;
11+
private String city;
12+
private Integer salary;
13+
14+
15+
16+
17+
public Employee(String employeeName, String department, String city, Integer salary) {
18+
super();
19+
this.employeeName = employeeName;
20+
this.department = department;
21+
this.city = city;
22+
this.salary = salary;
23+
}
24+
25+
public String getEmployeeName() {
26+
return employeeName;
27+
}
28+
public void setEmployeeName(String employeeName) {
29+
this.employeeName = employeeName;
30+
}
31+
public String getDepartment() {
32+
return department;
33+
}
34+
public void setDepartment(String department) {
35+
this.department = department;
36+
}
37+
public String getCity() {
38+
return city;
39+
}
40+
public void setCity(String city) {
41+
this.city = city;
42+
}
43+
public Integer getSalary() {
44+
return salary;
45+
}
46+
public void setSalary(Integer salary) {
47+
this.salary = salary;
48+
}
49+
50+
51+
52+
}

SparkSQL.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import org.apache.spark.api.java.JavaRDD;
2+
import org.apache.spark.api.java.JavaSparkContext;
3+
import org.apache.spark.sql.DataFrame;
4+
import org.apache.spark.sql.SQLContext;
5+
6+
public class SparkSQL {
7+
8+
private static Integer getNumericSal(String salary) {
9+
10+
return Integer.parseInt(salary.replace("$", "").replace(",", "").trim());
11+
12+
}
13+
14+
public static void main(String[] args) {
15+
JavaSparkContext javaSparkContext = SparkUtility.getJavaSparkContext(SparkConstant.MASTER_LOCAL, SparkConstant.APP_NAME+"SparkSQL");
16+
@SuppressWarnings("unused")
17+
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(javaSparkContext);
18+
JavaRDD<Employee> employee = javaSparkContext.textFile("file:///home/hduser/Files/EmpSal").map(line ->{
19+
String arr[] = line.split("\t");
20+
return new Employee(arr[0],arr[1],arr[3], getNumericSal(arr[5]));
21+
});
22+
DataFrame employeeDF = sqlContext.createDataFrame(employee, Employee.class);
23+
employeeDF.show();
24+
employeeDF.printSchema();
25+
employeeDF.registerTempTable("employee");
26+
DataFrame empResult = sqlContext.sql("SELECT * FROM employee WHERE salary>40728");
27+
//empResult.collectAsList().forEach(System.out.prin);
28+
empResult.collectAsList().forEach(System.out::println);
29+
}
30+
31+
}

SparkSQLHive.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
public class SparkSQLHive {
3+
4+
public static void main(String[] args) {
5+
// TODO Auto-generated method stub
6+
7+
}
8+
9+
}

SparkSQLJSON.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import org.apache.spark.api.java.JavaRDD;
2+
import org.apache.spark.api.java.JavaSparkContext;
3+
import org.apache.spark.sql.DataFrame;
4+
import org.apache.spark.sql.SQLContext;
5+
6+
public class SparkSQLJSON {
7+
8+
public static void main(String[] args) {
9+
JavaSparkContext javaSparkContext = SparkUtility.getJavaSparkContext(SparkConstant.MASTER_LOCAL, SparkConstant.APP_NAME+"SparkSQL");
10+
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(javaSparkContext);
11+
DataFrame employeeDF = sqlContext.read().json("file:///home/hduser/Files/Employee.json");
12+
13+
employeeDF.show();
14+
employeeDF.printSchema();
15+
employeeDF.registerTempTable("employee");
16+
DataFrame empResult = sqlContext.sql("SELECT name, address.city FROM employee WHERE address.state='California'");
17+
//empResult.collectAsList().forEach(System.out.prin);
18+
empResult.collectAsList().forEach(System.out::println);
19+
//employeeDF.saveAsParquetFile("");
20+
21+
employeeDF
22+
.write()
23+
.format("parquet")
24+
.save("file:///home/hduser/Files/Employee.parquet");
25+
26+
//employeeDF.write().save("file:///home/hduser/Files/Employeeparquet");
27+
}
28+
29+
}

SparkSQLParquet.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import org.apache.spark.api.java.JavaSparkContext;
2+
import org.apache.spark.sql.DataFrame;
3+
import org.apache.spark.sql.SQLContext;
4+
5+
public class SparkSQLParquet {
6+
7+
public static void main(String[] args) {
8+
JavaSparkContext javaSparkContext = SparkUtility.getJavaSparkContext(SparkConstant.MASTER_LOCAL, SparkConstant.APP_NAME+"SparkSQL");
9+
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(javaSparkContext);
10+
DataFrame employeeDF = sqlContext.parquetFile("file:///home/hduser/Files/Employee.parquet/Employee.parquet");
11+
12+
employeeDF.show();
13+
employeeDF.printSchema();
14+
employeeDF.registerTempTable("employee");
15+
DataFrame empResult = sqlContext.sql("SELECT name, address.city FROM employee WHERE address.state='California'");
16+
//empResult.collectAsList().forEach(System.out.prin);
17+
empResult.collectAsList().forEach(System.out::println);
18+
//employeeDF.saveAsParquetFile("");
19+
20+
// employeeDF
21+
// .write()
22+
// .format("parquet")
23+
// .save("file:///home/hduser/Files/Employee.parquet");
24+
25+
}
26+
27+
}

0 commit comments

Comments
 (0)