| title | PolyBase Transact-SQL reference | Microsoft Docs | |||
|---|---|---|---|---|
| ms.custom | ||||
| ms.date | 09/24/2018 | |||
| ms.prod | sql | |||
| ms.reviewer | ||||
| ms.technology | polybase | |||
| ms.topic | reference | |||
| helpviewer_keywords |
|
|||
| author | rothja | |||
| ms.author | jroth | |||
| manager | craigg |
[!INCLUDEappliesto-ss-xxxx-asdw-pdw-md-winonly]
To use PolyBase, you must create external tables to reference your external data.
CREATE DATABASE SCOPED CREDENTIAL (Transact-SQL)
CREATE EXTERNAL DATA SOURCE (Transact-SQL)
CREATE EXTERNAL FILE FORMAT (Transact-SQL)
CREATE EXTERNAL TABLE (Transact-SQL)
CREATE STATISTICS (Transact-SQL)
Note
PolyBase in SQL Server 2016 only supports Windows users. If you try to use a SQL user to query a PolyBase external table, the query will fail.
Configure PolyBase. See PolyBase configuration.
Applies to: SQL Server (starting with 2016), Parallel Data Warehouse
1. Create Database Scoped Credential
This step is required only for Kerberos-secured Hadoop clusters.
-- Create a master key on the database.
-- Required to encrypt the credential secret.
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'S0me!nfo';
-- Create a database scoped credential for Kerberos-secured Hadoop clusters.
-- IDENTITY: the Kerberos user name.
-- SECRET: the Kerberos password
CREATE DATABASE SCOPED CREDENTIAL HadoopUser1
WITH IDENTITY = '<hadoop_user_name>', Secret = '<hadoop_password>'; 2. Create External Data Source
-- Create an external data source.
-- LOCATION (Required) : Hadoop Name Node IP address and port.
-- RESOURCE MANAGER LOCATION (Optional): Hadoop Resource Manager location to enable pushdown computation.
-- CREDENTIAL (Optional): the database scoped credential, created above.
CREATE EXTERNAL DATA SOURCE MyHadoopCluster WITH (
TYPE = HADOOP,
LOCATION ='hdfs://10.xxx.xx.xxx:xxxx',
RESOURCE_MANAGER_LOCATION = '10.xxx.xx.xxx:xxxx',
CREDENTIAL = HadoopUser1
); 3. Create External File Format
-- Create an external file format.
-- FORMAT TYPE: Type of format in Hadoop (DELIMITEDTEXT, RCFILE, ORC, PARQUET).
CREATE EXTERNAL FILE FORMAT TextFileFormat WITH (
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS (FIELD_TERMINATOR ='|',
USE_TYPE_DEFAULT = TRUE)
4. Create External Table
-- Create an external table pointing to data stored in Hadoop.
-- LOCATION: path to file or directory that contains the data (relative to HDFS root).
CREATE EXTERNAL TABLE [dbo].[CarSensor_Data] (
[SensorKey] int NOT NULL,
[CustomerKey] int NOT NULL,
[GeographyKey] int NULL,
[Speed] float NOT NULL,
[YearMeasured] int NOT NULL
)
WITH (LOCATION='/Demo/',
DATA_SOURCE = MyHadoopCluster,
FILE_FORMAT = TextFileFormat
); 5. Create Statistics
-- Create statistics on an external table.
CREATE STATISTICS StatsForSensors on CarSensor_Data(CustomerKey, Speed) Applies to: SQL Server (starting with 2016), Azure SQL Data Warehouse, Parallel Data Warehouse
1. Create Database Scoped Credential
-- Create a master key on the database.
-- Required to encrypt the credential secret.
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'S0me!nfo';
-- Create a database scoped credential for Azure blob storage.
-- IDENTITY: any string (this is not used for authentication to Azure storage).
-- SECRET: your Azure storage account key.
CREATE DATABASE SCOPED CREDENTIAL AzureStorageCredential
WITH IDENTITY = 'user', Secret = '<azure_storage_account_key>'; 2. Create External Data Source
-- Create an external data source.
-- LOCATION: Azure account storage account name and blob container name.
-- CREDENTIAL: The database scoped credential created above.
CREATE EXTERNAL DATA SOURCE AzureStorage with (
TYPE = HADOOP,
LOCATION ='wasbs://<blob_container_name>@<azure_storage_account_name>.blob.core.windows.net',
CREDENTIAL = AzureStorageCredential
);
3. Create External File Format
-- Create an external file format.
-- FORMAT TYPE: Type of format in Hadoop (DELIMITEDTEXT, RCFILE, ORC, PARQUET).
CREATE EXTERNAL FILE FORMAT TextFileFormat WITH (
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS (FIELD_TERMINATOR ='|',
USE_TYPE_DEFAULT = TRUE)
4. Create External Table
-- Create an external table pointing to data stored in Azure storage.
-- LOCATION: path to a file or directory that contains the data (relative to the blob container).
-- To point to all files under the blob container, use LOCATION='/'
CREATE EXTERNAL TABLE [dbo].[CarSensor_Data] (
[SensorKey] int NOT NULL,
[CustomerKey] int NOT NULL,
[GeographyKey] int NULL,
[Speed] float NOT NULL,
[YearMeasured] int NOT NULL
)
WITH (LOCATION='/Demo/',
DATA_SOURCE = AzureStorage,
FILE_FORMAT = TextFileFormat
); 5. Create Statistics
-- Create statistics on an external table.
CREATE STATISTICS StatsForSensors on CarSensor_Data(CustomerKey, Speed)
Applies to: Azure SQL Data Warehouse
For more information, see Load with Azure Data Lake Store
1. Create Database Scoped Credential
-- Create a Database Master Key.
-- Only necessary if one does not already exist.
-- Required to encrypt the credential secret in the next step.
CREATE MASTER KEY;
-- Create a database scoped credential
-- IDENTITY: Pass the client id and OAuth 2.0 Token Endpoint taken from your Azure Active Directory Application
-- SECRET: Provide your AAD Application Service Principal key.
-- For more information on Create Database Scoped Credential: https://msdn.microsoft.com/library/mt270260.aspx
CREATE DATABASE SCOPED CREDENTIAL ADL_User
WITH
IDENTITY = '<client_id>@<OAuth_2.0_Token_EndPoint>'
,SECRET = '<key>'
;2. Create External Data Source
-- TYPE: HADOOP - PolyBase uses Hadoop APIs to access data in Azure Data Lake Store.
-- LOCATION: Provide Azure storage account name and blob container name.
-- CREDENTIAL: Provide the credential created in the previous step.
CREATE EXTERNAL DATA SOURCE AzureDataLakeStore
WITH (
TYPE = HADOOP,
LOCATION = 'adl://<AzureDataLake account_name>.azuredatalake.net,
CREDENTIAL = AzureStorageCredential
);3. Create External File Format
-- FIELD_TERMINATOR: Marks the end of each field (column) in a delimited text file
-- STRING_DELIMITER: Specifies the field terminator for data of type string in the text-delimited file.
-- DATE_FORMAT: Specifies a custom format for all date and time data that might appear in a delimited text file.
-- Use_Type_Default: Store all Missing values as NULL
CREATE EXTERNAL FILE FORMAT TextFileFormat
WITH
( FORMAT_TYPE = DELIMITEDTEXT
, FORMAT_OPTIONS ( FIELD_TERMINATOR = '|'
, STRING_DELIMITER = ''
, DATE_FORMAT = 'yyyy-MM-dd HH:mm:ss.fff'
, USE_TYPE_DEFAULT = FALSE
)
);4. Create External Table
-- LOCATION: Folder under the ADLS root folder.
-- DATA_SOURCE: Specifies which Data Source Object to use.
-- FILE_FORMAT: Specifies which File Format Object to use
-- REJECT_TYPE: Specifies how you want to deal with rejected rows. Either Value or percentage of the total
-- REJECT_VALUE: Sets the Reject value based on the reject type.
-- DimProduct
CREATE EXTERNAL TABLE [dbo].[DimProduct_external] (
[ProductKey] [int] NOT NULL,
[ProductLabel] [nvarchar](255) NULL,
[ProductName] [nvarchar](500) NULL
)
WITH
(
LOCATION='/DimProduct/'
, DATA_SOURCE = AzureDataLakeStore
, FILE_FORMAT = TextFileFormat
, REJECT_TYPE = VALUE
, REJECT_VALUE = 0
)
;5. Create Statistics
CREATE STATISTICS StatsForProduct on DimProduct_external(ProductKey) For examples of queries, see PolyBase Queries.