Hello,
BigQuery table is empty since 3 days. The issue seems to be around the collector but I can’t really figure what’s wrong.
When looking at the performances of my collector VM I’m not seeing anything wrong:
When looking at the subscription to the “good” pubsub topic I can clearly seen that something is happening:
Looking at the log of my collector I have this kind of error:
[scala-stream-collector-akka.actor.default-dispatcher-8] WARN akka.actor.ActorSystemImpl - Illegal header: Illegal 'user-agent' header: Invalid input ',', expected OWS, 'EOI', tchar, product-or-comment or comment (line 1, column 102): Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6,2 Safari/605.1.15
but I’ve always had error like this…
Here is the config of the collector:
# startup-script from quick start guide
#!/bin/bash
set -e -x
# -----------------------------------------------------------------------------
# BASE INSTALL
# -----------------------------------------------------------------------------
readonly CONFIG_DIR=/opt/snowplow/config
function install_base_packages() {
sudo apt install wget curl unzip -y
}
function install_docker_ce() {
sudo apt install docker.io -y
sudo systemctl enable --now docker
}
sudo apt update -y
install_base_packages
install_docker_ce
sudo mkdir -p ${CONFIG_DIR}
sudo cat << EOF > ${CONFIG_DIR}/collector.hocon
collector {
interface = "0.0.0.0"
port = 8080
ssl {
enable = false
redirect = false
port = 8443
}
paths {}
p3p {
policyRef = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}
crossDomain {
enabled = false
domains = [ "*" ]
secure = true
}
cookie {
enabled = true
expiration = "365 days"
name = sp
domains = []
fallbackDomain = ""
secure = true
httpOnly = false
sameSite = "None"
}
doNotTrackCookie {
enabled = false
name = ""
value = ""
}
cookieBounce {
enabled = false
name = "n3pc"
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000"
forwardedProtocolHeader = "X-Forwarded-Proto"
}
enableDefaultRedirect = false
redirectMacro {
enabled = false
placeholder = "[TOKEN]"
}
rootResponse {
enabled = false
statusCode = 302
headers = {}
body = "302, redirecting"
}
cors {
accessControlMaxAge = "5 seconds"
}
prometheusMetrics {
enabled = false
}
streams {
good = good
bad = bad
useIpAddressAsPartitionKey = false
sink {
enabled = google-pub-sub
googleProjectId = "my-sp-project-xxxx"
backoffPolicy {
minBackoff = 1000
maxBackoff = 1000
totalBackoff = 10000
multiplier = 1
}
}
buffer {
byteLimit = 1000000
recordLimit = 500
timeLimit = 500
}
}
}
akka {
loglevel = WARNING
loggers = ["akka.event.slf4j.Slf4jLogger"]
http.server {
remote-address-header = on
raw-request-uri-header = on
parsing {
max-uri-length = 32768
uri-parsing-mode = relaxed
}
max-connections = 2048
}
}
EOF
sudo docker run \
-d \
--name collector \
--restart always \
--network host \
--log-driver gcplogs \
-v ${CONFIG_DIR}:/snowplow/config \
-p 8080:8080 \
-e 'JAVA_OPTS=-Dorg.slf4j.simpleLogger.defaultLogLevel=info' \
snowplow/scala-stream-collector-pubsub:2.4.5 \
--config /snowplow/config/collector.hocon
what’s wrong?