Connection

Configuration

NAMETYPEDESCRIPTIONREQUIREDREADONLY
server_hostnamestringtruefalse
portintegertruefalse
access_tokenstringtruefalse
http_pathstringtruefalse
cloud_providerstringAccepted Values: aws, azurefalsefalse
auth_modestringHow to authenticate with AWS. Defaults to Access Key and Secret. Accepted Values: access_key_and_secret, iam_roletruefalse
iam_role_arnstring(required if auth_mode is “iam_role”)falsefalse
storage_credential_namestringfalsefalse
external_idstringExternal ID for the IAM rolefalsefalse
aws_access_key_idstringSee https://docs.polytomic.com/docs/databricks-connections#writing-to-databricks (required if auth_mode is “aws_access_key_id”)falsefalse
aws_secret_access_keystring(required if auth_mode is “aws_access_key_id”)falsefalse
aws_userstringfalsefalse
s3_bucket_namestringName of bucket used for staging data load files (required if cloud_provider is “aws”)falsefalse
s3_bucket_regionstring(required if cloud_provider is “aws”)falsefalse
azure_account_namestringThe account name of the storage account (required if cloud_provider is “azure”)falsefalse
azure_access_keystringThe access key associated with this storage account (required if cloud_provider is “azure”)falsefalse
container_namestringThe container which we will stage files in (required if cloud_provider is “azure”)falsefalse
unity_catalog_enabledbooleanfalsefalse
enable_delta_uniformbooleanfalsefalse
enforce_query_limitbooleanfalsefalse
concurrent_queriesintegerfalsefalse

Example

1{
2 "name": "databricks connection",
3 "type": "databricks",
4 "configuration": {
5 "access_token": "isoz8af6zvp8067gu68gvrp0oftevn",
6 "auth_mode": "access_key_and_secret",
7 "aws_access_key_id": "AKIAIOSFODNN7EXAMPLE",
8 "aws_secret_access_key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
9 "aws_user": "",
10 "azure_access_key": "abcdefghijklmnopqrstuvwxyz0123456789/+ABCDEabcdefghijklmnopqrstuvwxyz0123456789/+ABCDE==",
11 "azure_account_name": "account",
12 "cloud_provider": "aws",
13 "concurrent_queries": 0,
14 "container_name": "container",
15 "enable_delta_uniform": false,
16 "enforce_query_limit": false,
17 "external_id": "",
18 "http_path": "/sql",
19 "iam_role_arn": "",
20 "port": 443,
21 "s3_bucket_name": "s3://polytomic-databricks-results/customer-dataset",
22 "s3_bucket_region": "",
23 "server_hostname": "dbc-1234dsafas-d0001.cloud.databricks.com",
24 "storage_credential_name": "",
25 "unity_catalog_enabled": false
26 }
27}

Model Sync

Source

Configuration

NAMETYPEDESCRIPTIONREQUIREDREADONLY
catalogstringCatalogfalsefalse
schemastringSchemafalsefalse
tablestringTablefalsefalse
querystringQueryfalsefalse

Example

1{
2 ...
3 "configuration": {
4 "catalog": "samples",
5 "query": "SELECT * FROM samples.nyctaxi.trips",
6 "schema": "nyctaxi",
7 "table": "trips"
8 }
9}

Target

Configuration

NAMETYPEDESCRIPTIONREQUIREDREADONLY
preserve_table_on_resyncbooleanPreserve destination table when resyncingfalsefalse

Example

1{
2 ...
3 "target": {
4 "configuration": {
5 "preserve_table_on_resync": false
6 }
7 }
8}

Bulk Sync

Destination

Configuration

NAMETYPEDESCRIPTIONREQUIREDREADONLY
advancedobjectfalsefalse
external_location_namestringExternal locationfalsefalse
catalogstringCatalogfalsefalse
schemastringOutput schemafalsefalse
mirror_schemasbooleanMirror schemasfalsefalse

Example

1{
2 ...
3 "destination_configuration": {
4 "advanced": {
5 "hard_deletes": false,
6 "table_prefix": "",
7 "truncate_existing": false
8 },
9 "catalog": "samples",
10 "external_location_name": "",
11 "mirror_schemas": false,
12 "schema": "nyctaxi"
13 }
14}

Type handling

Destination types

POLYTOMIC TYPEDATABRICKS TYPE
array<>ARRAY<>
bigintBIGINT
booleanBOOLEAN
dateDATE
datetimeTIMESTAMP
decimal(precision, scale)DECIMAL(precision,scale)
doubleDOUBLE
intINT
jsonSTRING
jsonarraySTRING
numberDECIMAL(38,18)
object{}STRUCT<>
singleFLOAT
smallintSMALLINT
stringSTRING
timeTIMESTAMP

Source types

DATABRICKS TYPEPOLYTOMIC TYPE
ARRAY<>array<>
BIGINTbigint
DATEdate
DECIMAL(precision, scale)decimal(precision, scale)
DOUBLEdouble
FLOATsingle
INTint
INTERVALstring
MAP<>object{}
SMALLINTsmallint
STRUCT<>object{}
TIMESTAMPdatetime
TIMESTAMP_NTZdatetime
TINYINTsmallint
VARCHARstring