-
Notifications
You must be signed in to change notification settings - Fork 9.1k
Expand file tree
/
Copy pathinventory-export-hdfs-rcfile.sql
More file actions
71 lines (62 loc) · 2.2 KB
/
inventory-export-hdfs-rcfile.sql
File metadata and controls
71 lines (62 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
USE sales
GO
-- Enable option to allow INSERT against external table defined on HADOOP data source
--
DECLARE @config_option nvarchar(100) = 'allow polybase export';
IF NOT EXISTS(SELECT * FROM sys.configurations WHERE name = @config_option and value_in_use = 1)
BEGIN
EXECUTE sp_configure @config_option, 1;
RECONFIGURE WITH OVERRIDE;
END;
GO
-- Create data source for HDFS inside SQL big data cluster using the HADOOP type.
-- The HADOOP data source type was introduced in SQL Server 2016 to query data in
-- Hadoop clusters and relies on Java Hadoop client libraries and Map/Reduce for query
-- execution.
--
IF NOT EXISTS(SELECT * FROM sys.external_data_sources WHERE name = 'HadoopData')
CREATE EXTERNAL DATA SOURCE HadoopData
WITH(
TYPE=HADOOP,
LOCATION='hdfs://nmnode-0-svc:9000/',
RESOURCE_MANAGER_LOCATION='sparkhead-svc:8032'
);
-- Create file format for RCFILE with appropriate properties.
--
IF NOT EXISTS(SELECT * FROM sys.external_file_formats WHERE name = 'RCFILE')
CREATE EXTERNAL FILE FORMAT rcfile
WITH (
FORMAT_TYPE = RCFILE,
SERDE_METHOD = 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe',
DATA_COMPRESSION = 'org.apache.hadoop.io.compress.DefaultCodec'
);
-- Create external table over HDFS data source using HADOOP type in
-- SQL Server 2019 big data cluster. The HADOOP data source is existing
-- PolyBase v1 syntax available by specifying location to HDFS namenode in
-- SQL Server big data cluster.
--
IF NOT EXISTS(SELECT * FROM sys.external_tables WHERE name = 'inventory_hdfs_rcfile')
CREATE EXTERNAL TABLE [inventory_hdfs_rcfile]
("inv_date_sk" BIGINT, "inv_item_sk" BIGINT, "inv_warehouse_sk" BIGINT, "inv_quantity_on_hand" BIGINT)
WITH
(
DATA_SOURCE = HadoopData,
LOCATION = '/inventory_rcfile',
FILE_FORMAT = rcfile
);
GO
-- Export SQL Server table to HDFS
--
INSERT INTO inventory_hdfs_rcfile
SELECT "inv_date_sk", "inv_item_sk", "inv_warehouse_sk", "inv_quantity_on_hand"
FROM inventory;
GO
-- Query the exported data using external table
--
SELECT COUNT(*) FROm inventory_hdfs_rcfile;
GO
-- Cleanup external tables
--
/*
DROP EXTERNAL TABLE inventory_hdfs_rcfile
*/