Hi Guys,
3rd day i’m unable to start my emr-etl-runner setup.
I went many times through the configuration and every time i got the same error (i’ve removed the key and secret from the log for obvious reasons) :
F, [2016-04-07T16:56:52.022000 #8346] FATAL -- :
ContractError (Contract violation for return value: Expected: {:aws=>{:access_key_id=>String, :secret_access_key=>String, :s3=>{:region=>String, :buckets=>{:assets=>String, :jsonpath_assets=>#<Contracts::Maybe:0x5918a58d @vals=[String, nil]>, :log=>String, :raw=>{:in=>#<Contracts::ArrayOf:0x611cf9ad @contract=String>, :processing=>String, :archive=>String}, :enriched=>{:good=>String, :bad=>String, :errors=>#<Contracts::Maybe:0x354bfeaa @vals=[String, nil]>, :archive=>#<Contracts::Maybe:0x103c663c @vals=[String, nil]>}, :shredded=>{:good=>String, :bad=>String, :errors=>#<Contracts::Maybe:0x6d5bf577 @vals=[String, nil]>, :archive=>#<Contracts::Maybe:0x30508fb8 @vals=[String, nil]>}}}, :emr=>{:ami_version=>String, :region=>String, :jobflow_role=>String, :service_role=>String, :placement=>#<Contracts::Maybe:0x790b91cf @vals=[String, nil]>, :ec2_subnet_id=>#<Contracts::Maybe:0x6691eb1e @vals=[String, nil]>, :ec2_key_name=>String, :bootstrap=>#<Contracts::Maybe:0x75e3332d @vals=[#<Contracts::ArrayOf:0x4d627251 @contract=String>, nil]>, :software=>{:hbase=>#<Contracts::Maybe:0x17350bd2 @vals=[String, nil]>, :lingual=>#<Contracts::Maybe:0x1b9b13e5 @vals=[String, nil]>}, :jobflow=>{:master_instance_type=>String, :core_instance_count=>Contracts::Num, :core_instance_type=>String, :task_instance_count=>Contracts::Num, :task_instance_type=>String, :task_instance_bid=>#<Contracts::Maybe:0x62372d8b @vals=[Contracts::Num, nil]>}, :additional_info=>#<Contracts::Maybe:0x148f330d @vals=[String, nil]>, :bootstrap_failure_tries=>Contracts::Num}}, :collectors=>{:format=>String}, :enrich=>{:job_name=>String, :versions=>{:hadoop_enrich=>String, :hadoop_shred=>String}, :continue_on_unexpected_error=>Contracts::Bool, :output_compression=>#<Proc:0x7b9e5560@/root/snowplow-emr-etl-runner!/emr-etl-runner/lib/snowplow-emr-etl-runner/contracts.rb:23 (lambda)>}, :storage=>{:download=>{:folder=>#<Contracts::Maybe:0x4bf2507f @vals=[String, nil]>}, :targets=>#<Contracts::ArrayOf:0x1182e8cd @contract={:name=>String, :type=>String, :host=>String, :database=>String, :port=>Contracts::Num, :ssl_mode=>#<Contracts::Maybe:0xeab1bf2 @vals=[String, nil]>, :table=>String, :username=>#<Contracts::Maybe:0x11a41324 @vals=[String, nil]>, :password=>#<Contracts::Maybe:0xa3635b8 @vals=[String, nil]>, :es_nodes_wan_only=>#<Contracts::Maybe:0x4e5c2044 @vals=[Contracts::Bool, nil]>, :maxerror=>#<Contracts::Maybe:0x67307227 @vals=[Contracts::Num, nil]>, :comprows=>#<Contracts::Maybe:0x8a50f7f @vals=[Contracts::Num, nil]>}>}, :monitoring=>{:tags=>#<Contracts::HashOf:0x2981f3b6 @value=String, @key=Symbol>, :logging=>{:level=>String}, :snowplow=>#<Contracts::Maybe:0x12a74704 @vals=[{:method=>String, :collector=>String, :app_id=>String}, nil]>}}, Actual: {:aws=>{:access_key_id=>"", :secret_access_key=>"", :s3=>{:region=>"eu-west-1", :buckets=>{:assets=>"s3://snowplow-hosted-assets", :jsonpath_assets=>nil, :log=>"s3://ud-snowplow-etl/logs/", :raw=>{:in=>["s3://ud-snowplow-etl-logfiles/"], :processing=>"s3://ud-snowplow-etl/processing/", :archive=>"s3://ud-snowplow-etl-archive/raw"}, :enriched=>{:good=>"s3://ud-snowplow-etl/enriched/good", :bad=>"s3://ud-snowplow-etl/enriched/bad", :errors=>nil, :archive=>"s3://ud-snowplow-etl/enriched/archive"}, :shredded=>{:good=>"s3://ud-snowplow-etl/shredded/good", :bad=>"s3://ud-snowplow-etl/shredded/bad", :errors=>nil, :archive=>"s3://ud-snowplow-etl/shredded/archive"}}}, :emr=>{:ami_version=>"4.3.0", :region=>"eu-west-1", :jobflow_role=>"EMR_EC2_DefaultRole", :service_role=>"EMR_DefaultRole", :placement=>nil, :ec2_subnet_id=>"subnet-7783921c", :ec2_key_name=>"ud-com-aws-test-key", :bootstrap=>[], :software=>{:hbase=>"0.92.0", :lingual=>"1.1"}, :jobflow=>{:master_instance_type=>"m1.medium", :core_instance_count=>2, :core_instance_type=>"m1.medium", :task_instance_count=>0, :task_instance_type=>"m1.medium", :task_instance_bid=>0.015}, :bootstrap_failure_tries=>3, :additional_info=>nil}}, :collectors=>{:format=>"cloudfront"}, :enrich=>{:job_name=>"Snowplow ETL", :versions=>{:hadoop_enrich=>"1.6.0", :hadoop_shred=>"0.8.0", :hadoop_elasticsearch=>"0.1.0"}, :continue_on_unexpected_error=>false, :output_compression=>"NONE"}, :monitoring=>{:tags=>{}, :logging=>{:level=>"DEBUG"}, :snowplow=>{:method=>"get", :app_id=>"snowplow", :collector=>"d2bpvzh93js6np.cloudfront.net"}}} Value guarded in: Snowplow::EmrEtlRunner::Cli::load_config With Contract: Maybe, String => Hash At: /root/snowplow-emr-etl-runner!/emr-etl-runner/lib/snowplow-emr-etl-runner/cli.rb:134 ): /root/snowplow-emr-etl-runner!/gems/contracts-0.7/lib/contracts.rb:69:in `Contract' org/jruby/RubyProc.java:271:in `call' /root/snowplow-emr-etl-runner!/gems/contracts-0.7/lib/contracts.rb:147:in `failure_callback' /root/snowplow-emr-etl-runner!/gems/contracts-0.7/lib/contracts/decorators.rb:164:in `common_method_added' /root/snowplow-emr-etl-runner!/gems/contracts-0.7/lib/contracts/decorators.rb:159:in `common_method_added' file:/root/snowplow-emr-etl-runner!/emr-etl-runner/bin/snowplow-emr-etl-runner:37:in `(root)' org/jruby/RubyKernel.java:1091:in `load' file:/root/snowplow-emr-etl-runner!/META-INF/main.rb:1:in `(root)' org/jruby/RubyKernel.java:1072:in `require' file:/root/snowplow-emr-etl-runner!/META-INF/main.rb:1:in `(root)' /tmp/jruby5116652718292552574extract/jruby-stdlib-1.7.20.1.jar!/META-INF/jruby.home/lib/ruby/shared/rubygems/core_ext/kernel_require.rb:1:in `(root)'
Below you can find my config.yml
aws: access_key_id: <%= ENV['AWS_SNOWPLOW_ACCESS_KEY'] %> secret_access_key: <%= ENV['AWS_SNOWPLOW_SECRET_KEY'] %> s3: region: eu-west-1 buckets: assets: s3://snowplow-hosted-assets # DO NOT CHANGE unless you are hosting the jarfiles etc yourself in your own bucket jsonpath_assets: # If you have defined your own JSON Schemas, add the s3:// path to your own JSON Path files in your own bucket here log: s3://ud-snowplow-etl/logs/ raw: in: # Multiple in buckets are permitted - s3://ud-snowplow-etl-logfiles/ # e.g. s3://my-in-bucket processing: s3://ud-snowplow-etl/processing/ archive: s3://ud-snowplow-etl-archive/raw # e.g. s3://my-archive-bucket/raw enriched: good: s3://ud-snowplow-etl/enriched/good # e.g. s3://my-out-bucket/enriched/good bad: s3://ud-snowplow-etl/enriched/bad # e.g. s3://my-out-bucket/enriched/bad errors: # Leave blank unless :continue_on_unexpected_error: set to true below archive: s3://ud-snowplow-etl/enriched/archive # Where to archive enriched events to, e.g. s3://my-archive-bucket/enriched shredded: good: s3://ud-snowplow-etl/shredded/good # e.g. s3://my-out-bucket/shredded/good bad: s3://ud-snowplow-etl/shredded/bad # e.g. s3://my-out-bucket/shredded/bad errors: # Leave blank unless :continue_on_unexpected_error: set to true below archive: s3://ud-snowplow-etl/shredded/archive # Where to archive shredded events to, e.g. s3://my-archive-bucket/shredded emr: ami_version: 4.3.0 region: eu-west-1 # Always set this jobflow_role: EMR_EC2_DefaultRole # Created using $ aws emr create-default-roles service_role: EMR_DefaultRole # Created using $ aws emr create-default-roles placement: # Set this if not running in VPC. Leave blank otherwise ec2_subnet_id: subnet-7783921c # Set this if running in VPC. Leave blank otherwise ec2_key_name: ud-com-aws-test-key bootstrap: [] # Set this to specify custom boostrap actions. Leave empty otherwise software: hbase: "0.92.0" # Optional. To launch on cluster, provide version, "0.92.0", keep quotes. Leave empty otherwise. lingual: "1.1" # Optional. To launch on cluster, provide version, "1.1", keep quotes. Leave empty otherwise. # Adjust your Hadoop cluster below jobflow: master_instance_type: m1.medium core_instance_count: 2 core_instance_type: m1.medium task_instance_count: 0 # Increase to use spot instances task_instance_type: m1.medium task_instance_bid: 0.015 # In USD. Adjust bid, or leave blank for non-spot-priced (i.e. on-demand) task instances bootstrap_failure_tries: 3 # Number of times to attempt the job in the event of bootstrap failures additional_info: # Optional JSON string for selecting additional features collectors: format: cloudfront # For example: 'clj-tomcat' for the Clojure Collector, 'thrift' for Thrift records, 'tsv/com.amazon.aws.cloudfront/wd_access_log' for Cloudfront access logs or 'ndjson/urbanairship.connect/v1' for UrbanAirship Connect events enrich: job_name: Snowplow ETL # Give your job a name versions: hadoop_enrich: 1.6.0 # Version of the Hadoop Enrichment process hadoop_shred: 0.8.0 # Version of the Hadoop Shredding process hadoop_elasticsearch: 0.1.0 # Version of the Hadoop to Elasticsearch copying process continue_on_unexpected_error: false # Set to 'true' (and set :out_errors: above) if you don't want any exceptions thrown from ETL output_compression: NONE # Compression only supported with Redshift, set to NONE if you have Postgres targets. Allowed formats: NONE, GZIP monitoring: tags: {} # Name-value pairs describing this job logging: level: DEBUG # You can optionally switch to INFO for production snowplow: method: get app_id: snowplow # e.g. snowplow collector: d2bpvzh93js6np.cloudfront.net # e.g. d3rkrsqld9gmqf.cloudfront.net