Hey, I’m quite new to Snowplow and I’ve already deployed the open source pipeline up to the S3 enriched sink and now I’m trying to run the dataflow-runner
to shred the events but it seems my resolver.json
is not correctly set.
resolver.json
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-3",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iglu Central",
"priority": 0,
"vendorPrefixes": [ "com.snowplowanalytics" ],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
}
]
}
}
playbook,json
{
"schema": "iglu:com.snowplowanalytics.dataflowrunner/PlaybookConfig/avro/1-0-1",
"data": {
"region": "eu-central-1",
"credentials": {
"accessKeyId": "default",
"secretAccessKey": "default"
},
"steps": [
{
"type": "CUSTOM_JAR",
"name": "S3DistCp enriched data archiving",
"actionOnFailure": "CANCEL_AND_WAIT",
"jar": "/usr/share/aws/emr/s3-dist-cp/lib/s3-dist-cp.jar",
"arguments": [
"--src", "s3://s3sink/enriched/good/",
"--dest", "s3://s3sink/archive/enriched/run={{nowWithFormat "2006-01-02-15-04-05"}}/",
"--s3Endpoint", "s3-eu-central-1.amazonaws.com",
"--srcPattern", ".*",
"--outputCodec", "gz",
"--deleteOnSuccess"
]
},
{
"type": "CUSTOM_JAR",
"name": "RDB Shredder",
"actionOnFailure": "CANCEL_AND_WAIT",
"jar": "command-runner.jar",
"arguments": [
"spark-submit",
"--class", "com.snowplowanalytics.snowplow.rdbloader.shredder.batch.Main",
"--master", "yarn",
"--deploy-mode", "cluster",
"s3://snowplow-hosted-assets-eu-central-1/4-storage/rdb-shredder/snowplow-rdb-shredder-1.0.0.jar",
"--iglu-config", "{{base64File "/path/to/dataflow-runner/shredder/resolver.json"}}",
"--config", "{{base64File "/path/to/dataflow-runner/shredder/config.hocon"}}"
]
}
],
"tags": [ ]
}
}
error
{
"schema": "iglu:com.snowplowanalytics.snowplow.badrows/loader_iglu_error/jsonschema/2-0-0",
"data": {
"processor": {
"artifact": "snowplow-rdb-loader-common",
"version": "1.0.0"
},
"failure": [
{
"schemaCriterion": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-*-*",
"error": {
"error": "ResolutionError",
"lookupHistory": [
{
"repository": "Iglu Central",
"errors": [
{
"error": "NotFound"
}
],
"attempts": 1,
"lastAttempt": "2021-05-14T11:08:10.064Z"
},
{
"repository": "Iglu Client Embedded",
"errors": [
{
"error": "NotFound"
}
],
"attempts": 1,
"lastAttempt": "2021-05-14T11:08:10.064Z"
}
]
}
}
],
"payload": {...,
"contexts": {
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema": "iglu:org.w3/PerformanceTiming/jsonschema/1-0-0",
"data": {
"navigationStart": 1620986562657,
"redirectStart": 0,
"redirectEnd": 0,
"fetchStart": 1620986563055,
"domainLookupStart": 1620986563055,
"domainLookupEnd": 1620986563055,
"connectStart": 1620986563055,
"secureConnectionStart": 0,
"connectEnd": 1620986563055,
"requestStart": 1620986563056,
"responseStart": 1620986563250,
"responseEnd": 1620986563251,
"unloadEventStart": 0,
"unloadEventEnd": 0,
"domLoading": 1620986563264,
"domInteractive": 1620986563477,
"domContentLoadedEventStart": 1620986563712,
"domContentLoadedEventEnd": 1620986563712,
"domComplete": 1620986564427,
"loadEventStart": 1620986564451,
"loadEventEnd": 1620986564451
}
},
{
"schema": "iglu:com.snowplowanalytics.snowplow/web_page/jsonschema/1-0-0",
"data": {
"id": "d2040044-4329-4247-af09-cee1a9c5525e"
}
}
]
},
"unstruct_event": {
"schema": "iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0",
"data": {
"schema": "iglu:com.snowplowanalytics.snowplow/link_click/jsonschema/1-0-1",
"data": {
"targetUrl": "https://web.com",
"elementId": "",
"elementClasses": [
"sc-eCssSg",
"bXughz",
"sc-jLiVlK",
"hmPzYI",
"btn"
],
"elementTarget": ""
}
}
},
"derived_contexts": {
"schema": "iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0",
"data": [
{
"schema": "iglu:com.snowplowanalytics.snowplow/ua_parser_context/jsonschema/1-0-0",
"data": {
"useragentFamily": "Chrome",
"useragentMajor": "90",
"useragentMinor": "0",
"useragentPatch": "4430",
"useragentVersion": "Chrome 90.0.4430",
"osFamily": "Mac OS X",
"osMajor": "10",
"osMinor": "15",
"osPatch": "7",
"osPatchMinor": null,
"osVersion": "Mac OS X 10.15.7",
"deviceFamily": "Mac"
}
}
]
},
"domain_sessionid": "28940c40-bb53-46fd-80d0-d587bd673cc7",
"derived_tstamp": "2021-05-14T11:02:45.012Z",
"event_vendor": "com.snowplowanalytics.snowplow",
"event_name": "link_click",
"event_format": "jsonschema",
"event_version": "1-0-1",
"event_fingerprint": null,
"true_tstamp": null
}
What am I doing wrong? tried to change the json-schema
version from 1.0.0 to 1.0.3 but that didn’t make any difference