Hi fellow Snowplowers,
I just downloaded the latest version of Snowplow but I’m getting Contract violation for return value. I’ve compared the actual and expected and can’t find the problem.
Im using: snowplow-emr-etl-runner-r109 run --config emr.yml
Below is the error and my config file.
Thanks
Joao
ERROR: org.jruby.embed.EvalFailedException: (ReturnContractError) Contract violation for return value: Expected: #{:access_key_id=>String, :secret_access_key=>String, :s3=>{:region=>String, :buckets=>{:assets=>String, :jsonpath_assets=>#, :log=>String, :encrypted=>Contracts::Bool, :raw=>##, :processing=>String, :archive=>String}, nil]>, :enriched=>{:good=>String, :bad=>#, :errors=>#, :archive=>#, :stream=>#}, :shredded=>{:good=>String, :bad=>String, :errors=>#, :archive=>#}}}, :emr=>{:ami_version=>String, :region=>String, :jobflow_role=>String, :service_role=>String, :placement=>#, :ec2_subnet_id=>#, :ec2_key_name=>String, :security_configuration=>#, :bootstrap=>#<Contracts::Maybe:0x24bdd996 @vals=[#, nil]>, :software=>{:hbase=>#, :lingual=>#}, :jobflow=>{:job_name=>String, :master_instance_type=>String, :core_instance_count=>Contracts::Num, :core_instance_type=>String, :core_instance_ebs=>##, :volume_type=>#, :volume_iops=>#<Contracts::Maybe:0x60510791 @vals=[#, nil]>, :ebs_optimized=>#}, nil]>, :task_instance_count=>Contracts::Num, :task_instance_type=>String, :task_instance_bid=>#}, :additional_info=>#, :bootstrap_failure_tries=>Contracts::Num, :configuration=>#<Contracts::Maybe:0x165a5979 @vals=[#<Contracts::HashOf:0x4598961d @key=Symbol, @value=#>, nil]>}}, :collectors=>#String}, nil]>, :enrich=>{:versions=>#String}, nil]>, :continue_on_unexpected_error=>#, :output_compression=>#}, :storage=>{:versions=>{:rdb_shredder=>String, :hadoop_elasticsearch=>String, :rdb_loader=>String}}, :monitoring=>{:tags=>#, :logging=>{:level=>String}, :snowplow=>#String, :collector=>String, :app_id=>String}, nil]>}}, nil]>, Actual: {:aws=>{:access_key_id=>"xxxxxxxxxxxxxxxxxxxx", :secret_access_key=>"xxxxxxxxxxxxxxxxx", :s3=>{:region=>"us-west-2", :buckets=>{:assets=>"s3://snowplow-hosted-assets", :jsonpath_assets=>"s3://sp-xxxxxxxxx-jsonpaths", :log=>"s3://sp-xxxxxxxxx-log", :encrypted=>false, :raw=>{:in=>["s3://elasticbeanstalk-us-west-2-xxxxxxxxxx/resources/environments/logs/publish/e-fjft32pwx3"], :processing=>"s3://sp-xxxxxxxxx-processing", :archive=>"s3://sp-xxxxxxxxx-archive"}, :enriched=>{:good=>"s3://sp-xxxxxxxxx-output/enriched/good", :bad=>"s3://sp-xxxxxxxxx-output/enriched/bad", :errors=>nil, :archive=>"s3://sp-xxxxxxxxx-output/enriched/archive", :stream=>nil}, :shredded=>{:good=>"s3://sp-xxxxxxxxx-output/shredded/good", :bad=>"s3://sp-xxxxxxxxx-output/shredded/bad", :errors=>nil, :archive=>"s3://sp-xxxxxxxxx-output/shredded/archive"}}}, :emr=>{:ami_version=>"5.9.0", :region=>"us-west-2", :jobflow_role=>"EMR_EC2_DefaultRole", :service_role=>"EMR_DefaultRole", :placement=>nil, :ec2_subnet_id=>nil, :ec2_key_name=>"xxxxxxxxx", :security_configuration=>nil, :bootstrap=>[], :software=>{:hbase=>nil, :lingual=>nil}, :jobflow=>{:job_name=>"Snowplow ETL", :master_instance_type=>"m1.medium", :core_instance_count=>1, :core_instance_type=>"m1.medium", :core_instance_ebs=>{:volume_size=>50, :volume_type=>"gp2", :volume_iops=>400, :ebs_optimized=>false}, :task_instance_count=>0, :task_instance_type=>"m1.medium", :task_instance_bid=>0.015}, :bootstrap_failure_tries=>3, :configuration=>{:"yarn-site"=>{:"yarn.resourcemanager.am.max-attempts"=>1}, :spark=>{:maximizeResourceAllocation=>true}}, :additional_info=>nil}}, :collectors=>{:format=>"clj-tomcat"}, :enrich=>{:versions=>{:spark_enrich=>"1.16.0"}, :continue_on_unexpected_error=>false, :output_compression=>"GZIP"}, :storage=>{:versions=>{:rdb_loader=>"0.14.0", :rdb_shredder=>"0.13.1", :hadoop_elasticsearch=>"0.1.0"}}, :monitoring=>{:tags=>{}, :logging=>{:level=>"DEBUG"}, :snowplow=>{:method=>"get", :protocol=>"http", :port=>80, :app_id=>nil, :collector=>nil}}} Value guarded in: Snowplow::EmrEtlRunner::Cli::load_config With Contract: Maybe, String, Bool => Maybe At: uri:classloader:/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:205
This is the config file:
aws: # Credentials can be hardcoded or set in environment variables access_key_id: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx secret_access_key: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx s3: region: us-west-2 buckets: assets: s3://snowplow-hosted-assets # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket jsonpath_assets: s3://sp-xxxxxxxxx-jsonpaths # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here log: s3://sp-xxxxxxxxx-log encrypted: false # Whether the buckets below are enrcrypted using server side encryption (SSE-S3) raw: in: # This is a YAML array of one or more in buckets - you MUST use hyphens before each entry in the array, as below - s3://elasticbeanstalk-us-west-2-xxxxxxxxx/resources/environments/logs/publish/e-xxxxxx # e.g. s3://my-old-collector-bucket processing: s3://sp-xxxxxxxxx-processing archive: s3://sp-xxxxxxxxx-archive enriched: good: s3://sp-xxxxxxxxx-output/enriched/good # e.g. s3://my-out-bucket/enriched/good bad: s3://sp-xxxxxxxxx-output/enriched/bad # e.g. s3://my-out-bucket/enriched/bad errors: # Leave blank unless :continue_on_unexpected_error: set to true below archive: s3://sp-xxxxxxxxx-output/enriched/archive # Where to archive enriched events to, e.g. s3://my-archive-bucket/enriched stream: shredded: good: s3://sp-xxxxxxxxx-output/shredded/good # e.g. s3://my-out-bucket/shredded/good bad: s3://sp-xxxxxxxxx-output/shredded/bad # e.g. s3://my-out-bucket/shredded/bad errors: # Leave blank unless :continue_on_unexpected_error: set to true below archive: s3://sp-xxxxxxxxx-output/shredded/archive # Where to archive shredded events to, e.g. s3://my-archive-bucket/shredded emr: ami_version: 5.9.0 region: us-west-2 # Always set this jobflow_role: EMR_EC2_DefaultRole # Created using $ aws emr create-default-roles service_role: EMR_DefaultRole # Created using $ aws emr create-default-roles placement: # Set this if not running in VPC. Leave blank otherwise ec2_subnet_id: # Set this if running in VPC. Leave blank otherwise ec2_key_name: xxxxxxxxx security_configuration: # Specify your EMR security configuration if needed. Leave blank otherwise bootstrap: [] # Set this to specify custom boostrap actions. Leave empty otherwise software: hbase: # Optional. To launch on cluster, provide version, 0.92.0, keep quotes. Leave empty otherwise. lingual: # Optional. To launch on cluster, provide version, 1.1, keep quotes. Leave empty otherwise. # Adjust your Hadoop cluster below jobflow: job_name: Snowplow ETL # Give your job a name master_instance_type: m1.medium core_instance_count: 1 core_instance_type: m1.medium core_instance_ebs: # Optional. Attach an EBS volume to each core instance. volume_size: 50 # Gigabytes volume_type: gp2 volume_iops: 400 # Optional. Will only be used if volume_type is io1 ebs_optimized: false # Optional. Will default to true task_instance_count: 0 # Increase to use spot instances task_instance_type: m1.medium task_instance_bid: 0.015 # In USD. Adjust bid, or leave blank for non-spot-priced (i.e. on-demand) task instances bootstrap_failure_tries: 3 # Number of times to attempt the job in the event of bootstrap failures configuration: yarn-site: yarn.resourcemanager.am.max-attempts: 1 spark: maximizeResourceAllocation: true additional_info: # Optional JSON string for selecting additional features collectors: format: clj-tomcat # For example: 'clj-tomcat' for the Clojure Collector, 'thrift' for Thrift records, 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs or 'ndjson/urbanairship.connect/v1' for UrbanAirship Connect events enrich: versions: spark_enrich: 1.16.0 # Version of the Spark Enrichment process continue_on_unexpected_error: false # Set to 'true' (and set :out_errors: above) if you don't want any exceptions thrown from ETL output_compression: GZIP # Compression only supported with Redshift, set to NONE if you have Postgres targets. Allowed formats: NONE, GZIP storage: versions: rdb_loader: 0.14.0 rdb_shredder: 0.13.1 # Version of the Spark Shredding process hadoop_elasticsearch: 0.1.0 # Version of the Hadoop to Elasticsearch copying process monitoring: tags: {} # Name-value pairs describing this job logging: level: DEBUG # You can optionally switch to INFO for production snowplow: method: get protocol: http port: 80 app_id: # e.g. snowplow collector: # e.g. d3rkrsqld9gmqf.cloudfront.net