The cloud environment does not have the system console for the labeller to work. Zingg is run as a Spark Submit Job along with a python ntoebook based labeler specially created to run within the Databricks cloud.
Copy {
"settings": {
"new_cluster": {
"spark_version": "7.3.x-scala2.12",
"spark_conf": {
"spark.databricks.cluster.profile": "singleNode",
"spark.master": "local[*, 4]"
"aws_attributes": {
"availability": "SPOT_WITH_FALLBACK",
"first_on_demand": 1,
"zone_id": "us-west-2a"
"node_type_id": "c5d.xlarge",
"driver_node_type_id": "c5d.xlarge",
"custom_tags": {
"ResourceClass": "SingleNode"
"spark_submit_task": {
"parameters": [
"email_notifications": {},
"name": "test",
"max_concurrent_runs": 1
The config file for Databricks needs modifications to accept dbfs locations. Here is a sample config that worked
Copy {
"fieldName" : "fname",
"matchType" : "email",
"fields" : "fname",
"dataType": "\"string\""
"fieldName" : "lname",
"matchType" : "fuzzy",
"fields" : "lname",
"dataType": "\"string\""
"fieldName" : "stNo",
"matchType": "fuzzy",
"fields" : "stNo",
"dataType": "\"string\""
"fieldName" : "add1",
"matchType": "fuzzy",
"fields" : "add1",
"dataType": "\"string\""
"fieldName" : "add2",
"matchType": "fuzzy",
"fields" : "add2",
"dataType": "\"string\""
"fieldName" : "city",
"matchType": "fuzzy",
"fields" : "city",
"dataType": "\"string\""
"fieldName" : "state",
"matchType": "fuzzy",
"fields" : "state",
"dataType": "\"string\""
"fieldName" : "dob",
"matchType": "fuzzy",
"fields" : "dob",
"dataType": "\"string\""
"fieldName" : "ssn",
"matchType": "fuzzy",
"fields" : "ssn",
"dataType": "\"string\""
"output" : [{
"props": {
"location": "/febrl120/zinggOutput",
"delimiter": ",",
"data" : [{
"props": {
"location": "/FileStore/test.csv",
"delimiter": ",",
"{\"type\" : \"struct\",
\"fields\" : [
{\"name\":\"id\", \"type\":\"string\", \"nullable\":false},
{\"name\":\"fname\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"lname\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"stNo\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add1\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"add2\",\"type\":\"string\",\"nullable\":true} ,
{\"name\":\"city\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"state\", \"type\":\"string\", \"nullable\":true},
{\"name\":\"dob\",\"type\":\"string\",\"nullable\":true} ,
"labelDataSampleSize" : 0.1,
"modelId": 101,
"zinggDir": "/models"