Hi @josh,
i am sharing my entire configuration of my setup with you. please let me know if i messed up somewhere.
Thanks
Tracker code
< script type=“text/javascript” >
;(function(p,l,o,w,i,n,g){if(!p[i]){p.GlobalSnowplowNamespace=p.GlobalSnowplowNamespace||;
p.GlobalSnowplowNamespace.push(i);p[i]=function(){(p[i].q=p[i].q||).push(arguments)
};p[i].q=p[i].q||;n=l.createElement(o);g=l.getElementsByTagName(o)[0];n.async=1;
n.src=w;g.parentNode.insertBefore(n,g)}}(window,document,“script”,“//d1fc8wv8zag5ca.cloudfront.net/2.6.2/sp.js","snowplow”));
window.snowplow(‘newTracker’, ‘cf’, ‘xx.xx.xx.xx’, { // Initialise a tracker
appId: ‘pa-index’,
cookieDomain: ‘.proxyacid.com’
});
window.snowplow(‘trackPageView’);
collector.conf
collector {
interface = “0.0.0.0”
port = “80”
production = true
p3p {
policyref = “/w3c/p3p.xml”
CP = “NOI DSP COR NID PSA OUR IND COM NAV STA”
}
cookie {
enabled = true
expiration = “365 days” # e.g. “365 days”
name = cookie
#domain = “{{collectorCookieDomain}}”
}
sink {
enabled = “kinesis”
kinesis {
thread-pool-size: 10 # Thread pool size for Kinesis API requests
aws {
access-key: "key"
secret-key: "key"
}
stream {
region: "us-west-2"
good: "good"
bad: "bad"
}
backoffPolicy: {
minBackoff: 3000
maxBackoff: 600000
}
}
kafka {
brokers: "{{collectorKafkaBrokers}}"
topic {
good: "{{collectorKafkaTopicGoodName}}"
bad: "{{collectorKafkaTopicBadName}}"
}
}
buffer: {
byte-limit: 4500000 # 4.5MB
record-limit: 500 # 500 records
time-limit: 60000 # 1 minute
}
}
}
akka {
loglevel = DEBUG # ‘OFF’ for no logging, ‘DEBUG’ for all logging.
loggers = [“akka.event.slf4j.Slf4jLogger”]
}
spray.can.server {
remote-address-header = on
uri-parsing-mode = relaxed
raw-request-uri-header = on
parsing {
max-uri-length = 32768
}
}
enricher.conf
enrich {
source = “kinesis”
sink = “kinesis”
aws {
access-key: “key”
secret-key: “key”
}
kafka {
brokers: “{{enrichKafkaBrokers}}”
}
streams {
in: {
raw: “good”
maxRecords: 10000
buffer: {
byte-limit: 4500000
record-limit: 500 # Not supported by Kafka; will be ignored
time-limit: 60000
}
}
out: {
enriched: "enriched"
bad: "bad"
backoffPolicy: {
minBackoff: 3000
maxBackoff: 600000
}
}
app-name: "enricher-app"
initial-position = "TRIM_HORIZON"
region: "us-west-2"
}
monitoring {
snowplow {
collector-uri: “xx.xx.xx.xx”
collector-port: 80
app-id: “collector-monitor”
method: “GET”
}
}
}
resolver.json
{
“schema”: “iglu:com.snowplowanalytics.iglu/resolver-config/jsonschema/1-0-0”,
“data”: {
“cacheSize”: 500,
“repositories”: [
{
“name”: “Iglu Central”,
“priority”: 0,
“vendorPrefixes”: [ “com.snowplowanalytics” ],
“connection”: {
“http”: {
“uri”: “http://iglucentral.com”
}
}
}
]
}
}
enrichment/anon_ip.json
{
“schema”: “iglu:com.snowplowanalytics.snowplow/anon_ip/jsonschema/1-0-0”,
"data": {
"name": "anon_ip",
"vendor": "com.snowplowanalytics.snowplow",
"enabled": true,
"parameters": {
"anonOctets": 2
}
}
}
s3sink.conf
sink {
aws {
access-key: “key”
secret-key: “key”
}
kinesis {
in {
stream-name: “good”
initial-position: “TRIM_HORIZON”
max-records: “1000”
}
out {
stream-name: "bad"
}
region: "us-west-2"
app-name: "s3-sink-app"
}
s3 {
region: "us-west-2"
endpoint: "http://s3-us-west-2.s3.amazonaws.com"
bucket: "snowplow-logs-cnw/logs"
max-timeout: "300000"
format: "lzo"
}
buffer {
byte-limit: 4500000
record-limit: 500 # Not supported by Kafka; will be ignored
time-limit: 60000
}
logging {
level: “error”
}
monitoring {
snowplow {
collector-uri: “xx.xx.xx.xx”
collector-port: 80
app-id: “collector-monitor”
method: “GET”
}
}
}
elasticsearch.conf
sink {
source = “kinesis”
sink {
“good”: “elasticsearch”
“bad”: “kinesis”
}
stream-type: “good”
aws {
access-key: “key”
secret-key: “key”
}
kinesis {
in {
stream-name: "enriched" # Kinesis stream name
initial-position: "TRIM_HORIZON"
maxRecords: 10000
}
out {
stream-name: "bad"
shards: 1
}
region: "us-west-2"
app-name: "elastics-app"
}
elasticsearch {
client {
type: "http"
endpoint: "localhost"
port: "9200"
max-timeout: "7200"
http {
conn-timeout: "7200"
read-timeout: "7200"
}
}
cluster {
name: "elasticsearch"
index: "filebeat"
type: "esnow"
}
}
buffer {
byte-limit: 4500000
record-limit: 500 # Not supported by Kafka; will be ignored
time-limit: 60000
}
monitoring {
snowplow {
collector-uri: “xx.xx.xx.xx”
collector-port: 80
app-id: “collector-monitor”
method: “GET”
}
}
}