Past Sunday and the Sunday before we suddenly had 4 consecutive Snowflake transformer jobs that failed (we load hourly, so about 8 hours of data are not loaded). We tried to re-submit these jobs but they failed again. I went through the EMR logs in S3 but can’t find any specifics on what went wrong. The jobs failed after many minutes of running (e.g. the one I’m looking at now ran for 34 minutes). Before the same setup ran for months without issues.
What’s the correct way to debug / fix this? In which log files should I be able to find the error?
This is our playbook.json:
{
"schema":"iglu:com.snowplowanalytics.dataflowrunner/PlaybookConfig/avro/1-0-1",
"data":{
"region":"us-west-2",
"credentials":{
"accessKeyId": "xxxxxx",
"secretAccessKey": "xxxxxx"
},
"steps":[
{
"type":"CUSTOM_JAR",
"name":"Snowflake Transformer",
"actionOnFailure":"CANCEL_AND_WAIT",
"jar":"command-runner.jar",
"arguments":[
"spark-submit",
"--conf",
"spark.hadoop.mapreduce.job.outputformat.class=com.snowplowanalytics.snowflake.transformer.S3OutputFormat",
"--deploy-mode",
"cluster",
"--class",
"com.snowplowanalytics.snowflake.transformer.Main",
"s3://snowplow-prod-assets/snowplow-snowflake-transformer-0.6.0.jar",
"--config",
"{{base64File "./snowflake_etl.conf"}}",
"--resolver",
"{{base64File "./iglu_resolver_snowflake.json"}}",
"--inbatch-deduplication"
]
},
{
"type":"CUSTOM_JAR",
"name":"Snowflake Loader",
"actionOnFailure":"CANCEL_AND_WAIT",
"jar":"s3://snowplow-prod-assets/snowplow-snowflake-loader-0.6.0.jar",
"arguments":[
"load",
"--base64",
"--config",
"{{base64File "./snowflake_etl.conf"}}",
"--resolver",
"{{base64File "./iglu_resolver_snowflake.json"}}"
]
}
],
"tags":[ ]
}
}
and this is our snowflake_etl.conf:
{
"schema": "iglu:com.snowplowanalytics.snowplow.storage/snowflake_config/jsonschema/1-0-2",
"data": {
"name": "Snowplow Snowflake ETL Config",
"awsRegion": "us-west-2",
"auth": {
"roleArn": "arn:aws:iam::xxxx:role/snowplow-prod-snowflake-loader",
"sessionDuration": 900
},
"manifest": "snowplow-prod-snowflake-manifest",
"snowflakeRegion": "us-west-2",
"jdbcHost": "xxxx.snowflakecomputing.com",
"database": "xxxx",
"input": "s3://snowplow-prod-good/YYYY-MM-DD-HH/",
"stage": "snowplow_stage",
"badOutputUrl": "s3://snowplow-prod-transformed/badrow",
"stageUrl": "s3://snowplow-prod-transformed/stage",
"warehouse": "snowplow_etl_wh",
"schema": "atomic",
"account": "xxxxx",
"username": "snowplow_loader",
"password": {
"ec2ParameterStore": {
"parameterName": "snowplow.prod.snowflake.password"
}
},
"maxError": 1,
"purpose": "ENRICHED_EVENTS"
}
}