Thanks @Mike. I’m trying to add the modified jsonschema in my registry.
As I already had "iglu:com.segment/page/jsonschema/1-0-0"
in my registry and because I couldn’t delete it ({"message":"DELETE is forbidden on production registry"}
) or replace it ({"message":"Schema already exists"}
), I created another one with a different version: "iglu:com.segment/page/jsonschema/2-0-0"
. If I do a GET on my registry I see both.
Now I’m sending events on both schemas but I see only the events for 1-0-0
in the logs of the shredder and the loader. I don’t see any events for 2-0-0
.
Any idea what could cause this?
The URLs:
- [COLLECTOR HOST]/com.snowplowanalytics.iglu/v1?schema=iglu%3Acom.segment%2Fpage%2Fjsonschema%2F1-0-0&aid=[APP_ID]
- [COLLECTOR HOST]/com.snowplowanalytics.iglu/v1?schema=iglu%3Acom.segment%2Fpage%2Fjsonschema%2F2-0-0&aid=[APP_ID]
This is my iglu resolver:
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-1",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "my iglu server",
"priority": 0,
"vendorPrefixes": ["com.snowplowanalytics"],
"connection": {
"http": {
"uri": "[IGLU SERVER HOST]/api",
"apikey": "[API KEY]"
}
}
}
]
}
}
and my config:
{
# Human-readable identificator, can be random
"name": "Acme Redshift",
# Machine-readable unique identificator, must be UUID
"id": "123e4567-e89b-12d3-a456-426655440000",
# Data Lake (S3) region
"region": "us-west-2",
# SQS topic name used by Shredder and Loader to communicate
"messageQueue": ${SQS_TOPIC},
# Shredder-specific configs
"shredder": {
# "batch" for Spark job and "stream" for fs2 streaming app
"type" : "stream",
# For batch: path to enriched archive (must be populated separately with run=YYYY-MM-DD-hh-mm-ss directories) for S3 input
# "input": "s3://bucket/input/",
# For stream: appName, streamName, region triple for kinesis
"input": {
# kinesis and file are the only options for stream shredder
"type": "kinesis",
# KCL app name - a DynamoDB table will be created with the same name
"appName": ${DYNAMO_TABLE},
# Kinesis Stream name
"streamName": ${KINESIS_STREAM_ENRICHER_GOOD},
# Kinesis region
"region": "us-west-2",
# Kinesis position: LATEST or TRIM_HORIZON
"position": "TRIM_HORIZON"
},
# For stream shredder : frequency to emit loading finished message - 5,10,15,20,30,60 etc minutes
"windowing": "5 minutes",
# Path to shredded archive
"output": {
# Path to shredded output
"path": ${S3_BUCKET_SCHREDDED},
# Shredder output compression, GZIP or NONE
"compression": "GZIP"
}
},
# Schema-specific format settings (recommended to leave all three groups empty and use TSV as default)
"formats": {
# Format used by default (TSV or JSON)
"default": "TSV",
# Schemas to be shredded as JSONs, corresponding JSONPath files must be present. Automigrations will be disabled
"json": [
"iglu:com.acme/json-event/jsonschema/1-0-0",
"iglu:com.acme/json-event/jsonschema/2-*-*"
],
# Schemas to be shredded as TSVs, presence of the schema on Iglu Server is necessary. Automigartions enabled
"tsv": [ ],
# Schemas that won't be loaded
"skip": [
"iglu:com.acme/skip-event/jsonschema/1-*-*"
]
},
# Optional. S3 path that holds JSONPaths
#"jsonpaths": "s3://bucket/jsonpaths/",
# Warehouse connection details
"storage" : {
# Database, redshift is the only acceptable option
"type": "redshift",
# Redshift hostname
"host": ${REDSHIFT_HOST},
# Database name
"database": ${REDSHIFT_DATABASE},
# Database port
"port": 5439,
# AWS Role ARN allowing Redshift to load data from S3
"roleArn": ${REDSHIFT_IAM_ROLE_ARN},
# DB schema name
"schema": ${REDSHIFT_SCHEMA},
# DB user with permissions to load data
"username": ${REDSHIFT_USER},
# DB password
"password": ${REDSHIFT_PASSWORD},
# Custom JDBC configuration
"jdbc": {"ssl": true},
# MAXERROR, amount of acceptable loading errors
"maxError": 10
},
# Additional steps. analyze, vacuum and transit_load are valid values
"steps": ["analyze"],
# Observability and reporting options
"monitoring": {
# # Snowplow tracking (optional)
# "snowplow": {
# "appId": "redshift-loader",
# "collector": "snplow.acme.com",
# }
# # Optional, for tracking runtime exceptions
# "sentry": {
# "dsn": "http://sentry.acme.com"
# },
# # Optional, configure how metrics are reported
# "metrics": {
# # Optional, send metrics to StatsD server
# "statsd": {
# "hostname": "localhost",
# "port": 8125,
# # Any key-value pairs to be tagged on every StatsD metric
# "tags": {
# "app": "rdb-loader"
# }
# # Optional, override the default metric prefix
# # "prefix": "snowplow.rdbloader."
# },
# # Optional, print metrics on stdout (with slf4j)
# "stdout": {
# # Optional, override the default metric prefix
# # "prefix": "snowplow.rdbloader."
# }
# }
}
}