Hi there,
trying to deploy dataflow runner using this command below:
./dataflow-runner --log-level debug run-transient --emr-config cluster.json --emr-playbook playbook.json
cluster.config has pretty much default settings according to this setup guide Setup - Snowplow Docs
though I am getting the error below:
ERRO[0000] At least one of Availability Zone and Subnet id is required
At least one of Availability Zone and Subnet id is required
cluster.config has following content
{
"schema":"iglu:com.snowplowanalytics.dataflowrunner/ClusterConfig/avro/1-1-0",
"data":{
"name":"dataflow-runner - snowflake transformer",
"logUri":"s3://snwplw-maxv-snowflake-storage-integration/logs/",
"region":"us-east-1",
"credentials":null,
"roles":{
"jobflow":"EMR_EC2_DefaultRole",
"service":"EMR_DefaultRole"
},
"ec2":{
"amiVersion":"5.9.0",
"keyName":"Q4WEB-TST-VA.pem",
"location":{
"vpc":{
"subnetId":"subnet-0785479c0a323daeb"
}
},
"instances":{
"master":{
"type":"m2.xlarge"
},
"core":{
"type":"m2.xlarge",
"count":1
},
"task":{
"type":"m1.medium",
"count":0,
"bid":"0.015"
}
}
},
"tags":[ ],
"bootstrapActionConfigs":[ ],
"configurations":[
{
"classification":"core-site",
"properties":{
"Io.file.buffer.size":"65536"
}
},
{
"classification":"mapred-site",
"properties":{
"Mapreduce.user.classpath.first":"true"
}
},
{
"classification":"yarn-site",
"properties":{
"yarn.resourcemanager.am.max-attempts":"1"
}
},
{
"classification":"spark",
"properties":{
"maximizeResourceAllocation":"true"
}
}
],
"applications":[ "Hadoop", "Spark" ]
}
}
not sure why I am getting this error.
Thanks