| title | PolyBase Transact-SQL reference | Microsoft Docs | |||
|---|---|---|---|---|
| description | To use PolyBase, create external tables for your external data in Hadoop, Azure blob storage, Azure Data Lake Store, SQL Server, Oracle, Teradata, and MongoDB. | |||
| ms.date | 09/24/2018 | |||
| ms.prod | sql | |||
| ms.technology | polybase | |||
| ms.topic | reference | |||
| helpviewer_keywords |
|
|||
| author | MikeRayMSFT | |||
| ms.author | mikeray | |||
| ms.reviewer | ||||
| monikerRange | >= sql-server-linux-ver15 || >= sql-server-2016 || =sqlallproducts-allversions |
[!INCLUDEappliesto-ss-xxxx-asdw-pdw-md]
To use PolyBase, you must create external tables to reference your external data.
CREATE DATABASE SCOPED CREDENTIAL (Transact-SQL)
CREATE EXTERNAL DATA SOURCE (Transact-SQL)
CREATE EXTERNAL FILE FORMAT (Transact-SQL)
CREATE EXTERNAL TABLE (Transact-SQL)
CREATE STATISTICS (Transact-SQL)
Note
In order to use PolyBase you must have sysadmin or CONTROL SERVER level permissions on the database.
Configure PolyBase. See PolyBase configuration.
Applies to: SQL Server (starting with 2016), Parallel Data Warehouse
1. Create Database Scoped Credential
This step is required only for Kerberos-secured Hadoop clusters.
-- Create a master key on the database.
-- Required to encrypt the credential secret.
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'S0me!nfo';
-- Create a database scoped credential for Kerberos-secured Hadoop clusters.
-- IDENTITY: the Kerberos user name.
-- SECRET: the Kerberos password
CREATE DATABASE SCOPED CREDENTIAL HadoopUser1
WITH IDENTITY = '<hadoop_user_name>', Secret = '<hadoop_password>'; 2. Create External Data Source
-- Create an external data source.
-- LOCATION (Required) : Hadoop Name Node IP address and port.
-- RESOURCE MANAGER LOCATION (Optional): Hadoop Resource Manager location to enable pushdown computation.
-- CREDENTIAL (Optional): the database scoped credential, created above.
CREATE EXTERNAL DATA SOURCE MyHadoopCluster WITH (
TYPE = HADOOP,
LOCATION ='hdfs://10.xxx.xx.xxx:xxxx',
RESOURCE_MANAGER_LOCATION = '10.xxx.xx.xxx:xxxx',
CREDENTIAL = HadoopUser1
); 3. Create External File Format
-- Create an external file format.
-- FORMAT TYPE: Type of format in Hadoop (DELIMITEDTEXT, RCFILE, ORC, PARQUET).
CREATE EXTERNAL FILE FORMAT TextFileFormat WITH (
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS (FIELD_TERMINATOR ='|',
USE_TYPE_DEFAULT = TRUE)
4. Create External Table
-- Create an external table pointing to data stored in Hadoop.
-- LOCATION: path to file or directory that contains the data (relative to HDFS root).
CREATE EXTERNAL TABLE [dbo].[CarSensor_Data] (
[SensorKey] int NOT NULL,
[CustomerKey] int NOT NULL,
[GeographyKey] int NULL,
[Speed] float NOT NULL,
[YearMeasured] int NOT NULL
)
WITH (LOCATION='/Demo/',
DATA_SOURCE = MyHadoopCluster,
FILE_FORMAT = TextFileFormat
); 5. Create Statistics
-- Create statistics on an external table.
CREATE STATISTICS StatsForSensors on CarSensor_Data(CustomerKey, Speed) Applies to: SQL Server (starting with 2016), Azure SQL Data Warehouse, Parallel Data Warehouse
1. Create Database Scoped Credential
-- Create a master key on the database.
-- Required to encrypt the credential secret.
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'S0me!nfo';
-- Create a database scoped credential for Azure blob storage.
-- IDENTITY: any string (this is not used for authentication to Azure storage).
-- SECRET: your Azure storage account key.
CREATE DATABASE SCOPED CREDENTIAL AzureStorageCredential
WITH IDENTITY = 'user', Secret = '<azure_storage_account_key>'; 2. Create External Data Source
-- Create an external data source.
-- LOCATION: Azure account storage account name and blob container name.
-- CREDENTIAL: The database scoped credential created above.
CREATE EXTERNAL DATA SOURCE AzureStorage with (
TYPE = BLOB_STORAGE,
LOCATION ='wasbs://<blob_container_name>@<azure_storage_account_name>.blob.core.windows.net',
CREDENTIAL = AzureStorageCredential
);
3. Create External File Format
-- Create an external file format.
-- FORMAT TYPE: Type of format in Hadoop (DELIMITEDTEXT, RCFILE, ORC, PARQUET).
CREATE EXTERNAL FILE FORMAT TextFileFormat WITH (
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS (FIELD_TERMINATOR ='|',
USE_TYPE_DEFAULT = TRUE)
4. Create External Table
-- Create an external table pointing to data stored in Azure storage.
-- LOCATION: path to a file or directory that contains the data (relative to the blob container).
-- To point to all files under the blob container, use LOCATION='/'
CREATE EXTERNAL TABLE [dbo].[CarSensor_Data] (
[SensorKey] int NOT NULL,
[CustomerKey] int NOT NULL,
[GeographyKey] int NULL,
[Speed] float NOT NULL,
[YearMeasured] int NOT NULL
)
WITH (LOCATION='/Demo/',
DATA_SOURCE = AzureStorage,
FILE_FORMAT = TextFileFormat
); 5. Create Statistics
-- Create statistics on an external table.
CREATE STATISTICS StatsForSensors on CarSensor_Data(CustomerKey, Speed)
Applies to: Azure SQL Data Warehouse
For more information, see Load with Azure Data Lake Store
1. Create Database Scoped Credential
-- Create a Database Master Key.
-- Only necessary if one does not already exist.
-- Required to encrypt the credential secret in the next step.
CREATE MASTER KEY;
-- Create a database scoped credential
-- IDENTITY: Pass the client id and OAuth 2.0 Token Endpoint taken from your Azure Active Directory Application
-- SECRET: Provide your AAD Application Service Principal key.
-- For more information on Create Database Scoped Credential: https://msdn.microsoft.com/library/mt270260.aspx
CREATE DATABASE SCOPED CREDENTIAL ADL_User
WITH
IDENTITY = '<client_id>@<OAuth_2.0_Token_EndPoint>'
,SECRET = '<key>'
;2. Create External Data Source
-- TYPE: HADOOP - PolyBase uses Hadoop APIs to access data in Azure Data Lake Store.
-- LOCATION: Provide Azure storage account name and blob container name.
-- CREDENTIAL: Provide the credential created in the previous step.
CREATE EXTERNAL DATA SOURCE AzureDataLakeStore
WITH (
TYPE = HADOOP,
LOCATION = 'adl://<AzureDataLake account_name>.azuredatalake.net,
CREDENTIAL = AzureStorageCredential
);3. Create External File Format
-- FIELD_TERMINATOR: Marks the end of each field (column) in a delimited text file
-- STRING_DELIMITER: Specifies the field terminator for data of type string in the text-delimited file.
-- DATE_FORMAT: Specifies a custom format for all date and time data that might appear in a delimited text file.
-- Use_Type_Default: Store all Missing values as NULL
CREATE EXTERNAL FILE FORMAT TextFileFormat
WITH
( FORMAT_TYPE = DELIMITEDTEXT
, FORMAT_OPTIONS ( FIELD_TERMINATOR = '|'
, STRING_DELIMITER = ''
, DATE_FORMAT = 'yyyy-MM-dd HH:mm:ss.fff'
, USE_TYPE_DEFAULT = FALSE
)
);4. Create External Table
-- LOCATION: Folder under the ADLS root folder.
-- DATA_SOURCE: Specifies which Data Source Object to use.
-- FILE_FORMAT: Specifies which File Format Object to use
-- REJECT_TYPE: Specifies how you want to deal with rejected rows. Either Value or percentage of the total
-- REJECT_VALUE: Sets the Reject value based on the reject type.
-- DimProduct
CREATE EXTERNAL TABLE [dbo].[DimProduct_external] (
[ProductKey] [int] NOT NULL,
[ProductLabel] [nvarchar](255) NULL,
[ProductName] [nvarchar](500) NULL
)
WITH
(
LOCATION='/DimProduct/'
, DATA_SOURCE = AzureDataLakeStore
, FILE_FORMAT = TextFileFormat
, REJECT_TYPE = VALUE
, REJECT_VALUE = 0
)
;5. Create Statistics
CREATE STATISTICS StatsForProduct on DimProduct_external(ProductKey) 1. Create Database Scoped Credential
-- Create a Master Key
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'S0me!nfo';
/* specify credentials to external data source
* IDENTITY: user name for external source.
* SECRET: password for external source.
*/
CREATE DATABASE SCOPED CREDENTIAL SqlServerCredentials
WITH IDENTITY = 'username', Secret = 'password';2. Create External Data Source
/* LOCATION: Location string should be of format '<vendor>://<server>[:<port>]'.
* PUSHDOWN: specify whether computation should be pushed down to the source. ON by default.
* CREDENTIAL: the database scoped credential, created above.
*/
CREATE EXTERNAL DATA SOURCE SQLServerInstance
WITH (
LOCATION = 'sqlserver://SqlServer',
-- PUSHDOWN = ON | OFF,
CREDENTIAL = SQLServerCredentials
);3. Create Schema
CREATE SCHEMA sqlserver;
GO4. Create External Table
/* LOCATION: sql server table/view in 'database_name.schema_name.object_name' format
* DATA_SOURCE: the external data source, created above.
*/
CREATE EXTERNAL TABLE sqlserver.customer(
C_CUSTKEY INT NOT NULL,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_NATIONKEY INT NOT NULL,
C_PHONE CHAR(15) NOT NULL,
C_ACCTBAL DECIMAL(15,2) NOT NULL,
C_MKTSEGMENT CHAR(10) NOT NULL,
C_COMMENT VARCHAR(117) NOT NULL
)
WITH (
LOCATION='tpch_10.dbo.customer',
DATA_SOURCE=SqlServerInstance
);5. Create Statistics
CREATE STATISTICS CustomerCustKeyStatistics ON sqlserver.customer (C_CUSTKEY) WITH FULLSCAN; 1. Create Database Scoped Credential
-- Create a Master Key
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'password';
/*
* Specify credentials to external data source
* IDENTITY: user name for external source.
* SECRET: password for external source.
*/
CREATE DATABASE SCOPED CREDENTIAL credential_name
WITH IDENTITY = 'username', Secret = 'password';2. Create External Data Source
/*
* LOCATION: Location string should be of format '<vendor>://<server>[:<port>]'.
* PUSHDOWN: specify whether computation should be pushed down to the source. ON by default.
* CONNECTION_OPTIONS: Specify driver location
* CREDENTIAL: the database scoped credential, created above.
*/
CREATE EXTERNAL DATA SOURCE external_data_source_name
WITH (
LOCATION = 'oracle://<server address>[:<port>]',
-- PUSHDOWN = ON | OFF,
CREDENTIAL = credential_name)3. Create External Table
/*
* LOCATION: Oracle table/view in '<database_name>.<schema_name>.<object_name>' format
* DATA_SOURCE: the external data source, created above.
*/
CREATE EXTERNAL TABLE customers(
[O_ORDERKEY] DECIMAL(38) NOT NULL,
[O_CUSTKEY] DECIMAL(38) NOT NULL,
[O_ORDERSTATUS] CHAR COLLATE Latin1_General_BIN NOT NULL,
[O_TOTALPRICE] DECIMAL(15,2) NOT NULL,
[O_ORDERDATE] DATETIME2(0) NOT NULL,
[O_ORDERPRIORITY] CHAR(15) COLLATE Latin1_General_BIN NOT NULL,
[O_CLERK] CHAR(15) COLLATE Latin1_General_BIN NOT NULL,
[O_SHIPPRIORITY] DECIMAL(38) NOT NULL,
[O_COMMENT] VARCHAR(79) COLLATE Latin1_General_BIN NOT NULL
)
WITH (
LOCATION='customer',
DATA_SOURCE= external_data_source_name
);4. Create Statistics
CREATE STATISTICS statistics_name ON customer (C_CUSTKEY) WITH FULLSCAN; 1. Create Database Scoped Credential
-- Create a Master Key
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'password';
/*
* Specify credentials to external data source
* IDENTITY: user name for external source.
* SECRET: password for external source.
*/
CREATE DATABASE SCOPED CREDENTIAL credential_name
WITH IDENTITY = 'username', Secret = 'password';2. Create External Data Source
/* LOCATION: Location string should be of format '<vendor>://<server>[:<port>]'.
* PUSHDOWN: specify whether computation should be pushed down to the source. ON by default.
* CONNECTION_OPTIONS: Specify driver location
* CREDENTIAL: the database scoped credential, created above.
*/
CREATE EXTERNAL DATA SOURCE external_data_source_name
WITH (
LOCATION = teradata://<server address>[:<port>],
-- PUSHDOWN = ON | OFF,
CREDENTIAL =credential_name
);
3. Create External Table
/* LOCATION: Teradata table/view in '<database_name>.<object_name>' format
* DATA_SOURCE: the external data source, created above.
*/
CREATE EXTERNAL TABLE customer(
L_ORDERKEY INT NOT NULL,
L_PARTKEY INT NOT NULL,
L_SUPPKEY INT NOT NULL,
L_LINENUMBER INT NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR NOT NULL,
L_LINESTATUS CHAR NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL
)
WITH (
LOCATION='customer',
DATA_SOURCE= external_data_source_name
);4. Create Statistics
CREATE STATISTICS statistics_name ON customer (C_CUSTKEY) WITH FULLSCAN; 1. Create Database Scoped Credential
-- Create a Master Key
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'password';
/*
* Specify credentials to external data source
* IDENTITY: user name for external source.
* SECRET: password for external source.
*/
CREATE DATABASE SCOPED CREDENTIAL credential_name
WITH IDENTITY = 'username', Secret = 'password';2. Create External Data Source
/* LOCATION: Location string should be of format '<type>://<server>[:<port>]'.
* PUSHDOWN: specify whether computation should be pushed down to the source. ON by default.
*CONNECTION_OPTIONS: Specify driver location
* CREDENTIAL: the database scoped credential, created above.
*/
CREATE EXTERNAL DATA SOURCE external_data_source_name
WITH (
LOCATION = mongodb://<server>[:<port>],
-- PUSHDOWN = ON | OFF,
CREDENTIAL = credential_name
);3. Create External Table
/* LOCATION: MongoDB table/view in '<database_name>.<schema_name>.<object_name>' format
* DATA_SOURCE: the external data source, created above.
*/
CREATE EXTERNAL TABLE customers(
[O_ORDERKEY] DECIMAL(38) NOT NULL,
[O_CUSTKEY] DECIMAL(38) NOT NULL,
[O_ORDERSTATUS] CHAR COLLATE Latin1_General_BIN NOT NULL,
[O_TOTALPRICE] DECIMAL(15,2) NOT NULL,
[O_ORDERDATE] DATETIME2(0) NOT NULL,
[O_COMMENT] VARCHAR(79) COLLATE Latin1_General_BIN NOT NULL
)
WITH (
LOCATION='customer',
DATA_SOURCE= external_data_source_name
);4. Create Statistics
CREATE STATISTICS statistics_name ON customer (C_CUSTKEY) WITH FULLSCAN; For examples of queries, see PolyBase Queries.