Hi there,
I’ve setup successfully dataflow using Beam Enrich as below configuration:
bin/beam-enrich --runner=DataFlowRunner --project=my-project-id --streaming=true --zone=asia-southeast1-a --gcpTempLocation=gs://snowplow-enrichment-temp/temp/ --raw=projects/my-project-id/subscriptions/raw-topic-subscription --enriched=projects/my-project-id/topics/enriched --bad=projects/my-project-id/topics/bad --resolver=/Users/jin/projects/my-folder/ads/snowplow-r114-polonnaruwa/3-enrich/config/iglu_resolver.json --enrichments=/Users/jin/projects/my-folder/ads/snowplow-r114-polonnaruwa/3-enrich/config/enrichments/ --workerMachineType=n1-standard-2 --job-name=snowplow-enrich --pii=projects/my-project-id/topics/pii
The dataflow job is created, but It got this error when I try to request to the Collector (Scala Collector)
curl "http://192.168.0.100:8080/i?&e=pv&page=myPage&url=https%3A%2F%2Flive.page.tv&aid=11111&p=web&tv=no-js-0.1.0"
The error of Dataflow is:
com.google.common.util.concurrent.UncheckedExecutionException: java.lang.IllegalArgumentException: No filesystem found for scheme http
com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2214)
com.google.common.cache.LocalCache.get(LocalCache.java:4053)
com.google.common.cache.LocalCache.getAll(LocalCache.java:4116)
com.google.common.cache.LocalCache$LocalLoadingCache.getAll(LocalCache.java:5000)
com.spotify.scio.util.RemoteFileUtil.download(RemoteFileUtil.java:108)
com.spotify.scio.values.FileDistCache.prepareFiles(DistCache.scala:49)
com.spotify.scio.values.DistCacheMulti.init(DistCache.scala:96)
com.spotify.scio.values.FileDistCache.data$lzycompute(DistCache.scala:43)
com.spotify.scio.values.FileDistCache.data(DistCache.scala:43)
com.spotify.scio.values.FileDistCache.apply(DistCache.scala:39)
com.snowplowanalytics.snowplow.enrich.beam.Enrich$$anonfun$enrichEvents$1.apply(Enrich.scala:137)
com.snowplowanalytics.snowplow.enrich.beam.Enrich$$anonfun$enrichEvents$1.apply(Enrich.scala:136)
com.spotify.scio.util.Functions$$anon$7.processElement(Functions.scala:145)
Caused by: java.lang.IllegalArgumentException: No filesystem found for scheme http
org.apache.beam.sdk.io.FileSystems.getFileSystemInternal(FileSystems.java:459)
org.apache.beam.sdk.io.FileSystems.match(FileSystems.java:119)
org.apache.beam.sdk.io.FileSystems.matchSingleFileSpec(FileSystems.java:183)
com.spotify.scio.util.RemoteFileUtil.getMetadata(RemoteFileUtil.java:249)
com.spotify.scio.util.RemoteFileUtil.downloadImpl(RemoteFileUtil.java:169)
com.spotify.scio.util.RemoteFileUtil.access$000(RemoteFileUtil.java:52)
com.spotify.scio.util.RemoteFileUtil$1.load(RemoteFileUtil.java:66)
com.spotify.scio.util.RemoteFileUtil$1.load(RemoteFileUtil.java:63)
com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3628)
com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2336)
com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2295)
com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2208)
com.google.common.cache.LocalCache.get(LocalCache.java:4053)
com.google.common.cache.LocalCache.getAll(LocalCache.java:4116)
com.google.common.cache.LocalCache$LocalLoadingCache.getAll(LocalCache.java:5000)
com.spotify.scio.util.RemoteFileUtil.download(RemoteFileUtil.java:108)
com.spotify.scio.values.FileDistCache.prepareFiles(DistCache.scala:49)
com.spotify.scio.values.DistCacheMulti.init(DistCache.scala:96)
com.spotify.scio.values.FileDistCache.data$lzycompute(DistCache.scala:43)
com.spotify.scio.values.FileDistCache.data(DistCache.scala:43)
com.spotify.scio.values.FileDistCache.apply(DistCache.scala:39)
com.snowplowanalytics.snowplow.enrich.beam.Enrich$$anonfun$enrichEvents$1.apply(Enrich.scala:137)
com.snowplowanalytics.snowplow.enrich.beam.Enrich$$anonfun$enrichEvents$1.apply(Enrich.scala:136)
com.spotify.scio.util.Functions$$anon$7.processElement(Functions.scala:145)
com.spotify.scio.util.Functions$$anon$7$DoFnInvoker.invokeProcessElement(Unknown Source)
org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:185)
org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:149)
com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:323)
com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:43)
com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:48)
com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(SimpleParDoFn.java:271)
org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowedValue(SimpleDoFnRunner.java:219)
org.apache.beam.runners.core.SimpleDoFnRunner.access$700(SimpleDoFnRunner.java:69)
org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:517)
org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:505)
com.spotify.scio.util.Functions$$anon$7.processElement(Functions.scala:145)
com.spotify.scio.util.Functions$$anon$7$DoFnInvoker.invokeProcessElement(Unknown Source)
org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:185)
org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:149)
com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:323)
com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:43)
com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:48)
com.google.cloud.dataflow.worker.SimpleParDoFn$1.output(SimpleParDoFn.java:271)
org.apache.beam.runners.core.SimpleDoFnRunner.outputWindowedValue(SimpleDoFnRunner.java:219)
org.apache.beam.runners.core.SimpleDoFnRunner.access$700(SimpleDoFnRunner.java:69)
org.apache.beam.runners.core.SimpleDoFnRunner$DoFnProcessContext.output(SimpleDoFnRunner.java:517)
org.apache.beam.sdk.transforms.DoFnOutputReceivers$WindowedContextOutputReceiver.output(DoFnOutputReceivers.java:71)
org.apache.beam.sdk.transforms.MapElements$1.processElement(MapElements.java:128)
org.apache.beam.sdk.transforms.MapElements$1$DoFnInvoker.invokeProcessElement(Unknown Source)
org.apache.beam.runners.core.SimpleDoFnRunner.invokeProcessElement(SimpleDoFnRunner.java:185)
org.apache.beam.runners.core.SimpleDoFnRunner.processElement(SimpleDoFnRunner.java:149)
com.google.cloud.dataflow.worker.SimpleParDoFn.processElement(SimpleParDoFn.java:323)
com.google.cloud.dataflow.worker.util.common.worker.ParDoOperation.process(ParDoOperation.java:43)
com.google.cloud.dataflow.worker.util.common.worker.OutputReceiver.process(OutputReceiver.java:48)
com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:200)
com.google.cloud.dataflow.worker.util.common.worker.ReadOperation.start(ReadOperation.java:158)
com.google.cloud.dataflow.worker.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:75)
com.google.cloud.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1227)
com.google.cloud.dataflow.worker.StreamingDataflowWorker.access$1000(StreamingDataflowWorker.java:135)
com.google.cloud.dataflow.worker.StreamingDataflowWorker$6.run(StreamingDataflowWorker.java:966)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)
What does it meant?And how to fix it?
Thank in advanced