I’ve followed the instructions in the schema registry page and in the configuring shredding page but whenever I run an EMR process the data is not uploaded to the custom table I created. Here are the pertinent files I created (some of the names and words have been changed), I assume there’s just some minor detail I’m missing. Any help would be greatly appreciated.
\custom_event_1.json - located at S3 bucket rr-snowplow-cloudfront-iglu-central/jsonpaths/com.rigdigbi
{
"jsonpaths": [
"$.schema.vendor",
"$.schema.name",
"$.schema.format",
"$.schema.version",
"$.hierarchy.rootId",
"$.hierarchy.rootTstamp",
"$.hierarchy.refRoot",
"$.hierarchy.refTree",
"$.hierarchy.refParent",
"$.data.app_id",
"$.data.g_number",
"$.data.org_id",
"$.data.user_id",
"$.data.user_name"
]
}
\custom_event.json - located at rr-snowplow-cloudfront-iglu-central/schemas/com.rigdigbi/custom_event/jsonschema/1-0-0
{
"$schema": "http://rr-snowplow-cloudfront-iglu-central/schemas/com.rigdigbi/custom_event/jsonschema/1-0-0",
"description": "Schema for custom organization event",
"self": {
"vendor": "com.rigdigbi",
"name": "custom_event",
"format": "jsonschema",
"version": "1-0-0"
},
"type": "object",
"properties": {
"app_id": {
"type": "string",
"maxLength": 255
},
"g_number": {
"type": "string",
"maxLength": 255
},
"org_id": {
"type": "integer"
},
"user_id": {
"type": "string",
"maxLength": 255
},
"user_name": {
"type": "string",
"maxLength": 255
}
},
"required": ["app_id", "g_number", "org_id", "user_id", "user_name"],
"additionalProperties": false
}
\etl.resolver.json - located in Linux environment
{
"schema": "iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-0",
"data": {
"cacheSize": 500,
"repositories": [
{
"name": "Iglu Central",
"priority": 0,
"vendorPrefixes": [
"com.snowplowanalytics"
],
"connection": {
"http": {
"uri": "http://iglucentral.com"
}
}
},
{
"name": "Custom Event",
"priority": 0,
"vendorPrefixes": [
"com.rigdigbi"
],
"connection": {
"http": {
"uri": "http://d2y4sajn4og0gq.cloudfront.net/" #this reads from bucket rr-snowplow-cloudfront-iglu-central
}
}
}
]
}
}
\etl.cf.yml - located in Linux environment
# this config file is for the cloudfront collector.
# Don't use for clojure collector
aws:
access_key_id: 11111
secret_access_key: aaaaa
s3:
region: us-east-1
buckets:
assets: s3://sp-hosted-assets
log: s3://sp-log/emr
raw:
in:
- s3://sp-cloudfront-dev-logs/
processing: s3://sp-cloudfront-processing
archive: s3://sp-cloudfront-archive/raw
enriched:
good: s3://sp-cloudfront-enriched/good
bad: s3://sp-cloudfront-enriched/bad
errors: s3://sp-cloudfront-enriched/errors
archive: s3://sp-cloudfront-archive/enriched/good
shredded:
good: s3://sp-cloudfront-shredded/good
bad: s3://sp-cloudfront-shredded/bad
errors: s3://sp-cloudfront-shredded/errors
archive: s3://sp-cloudfront-archive/shredded/good
jsonpath_assets: s3://sp-cloudfront-iglu-central/jsonpaths
emr:
ami_version: 3.6.0
region: us-east-1
placement: us-east-1c
ec2_subnet_id:
jobflow_role: EMR_EC2_DefaultRole
service_role: EMR_DefaultRole
ec2_key_name: SisenseKeyPair
software:
hbase: # not used for ami_version 3.6.0
lingual: # not used for ami_version 3.6.0
jobflow:
master_instance_type: m1.medium
core_instance_count: 2
core_instance_type: c3.xlarge # m1.large
task_instance_count: 0
task_instance_type: m1.medium
task_instance_bid: 0.015
bootstrap_failure_tries: 3
collectors:
format: cloudfront
enrich:
job_name: Snowplow Cloudfront ETL
versions:
hadoop_enrich: 1.0.0
hadoop_shred: 0.4.0
continue_on_unexpected_error: false
output_compression: NONE
storage:
download:
folder:
targets:
- name: RR Snowplow Events
type: redshift
host: snowplow.aaa111.us-east-1.redshift.amazonaws.com
database: events
port: 1234
table: atomic.events
username: user
password: 1234567
maxerror: 10
comprows: 200000
monitoring:
tags: {}
logging:
level: INFO
snowplow:
\index.html - Only Unstruct Event tracking code is shown
snowplow_name_here('trackUnstructEvent', {
schema: 'iglu:rr-snowplow-cloudfront-iglu-central/schemas/com.rigdigbi/custom_event/jsonschema/1-0-0',
data: {
app_id: 'etltesting',
g_number: 'abc123',
org_id: 1,
user_id: 'zzz999',
user_name: 'user',
}
});