Hi Everyone,
We have a real time data pipeline setup in aws.
In enrichment step we are using PII enrichment to mask ip address from users.
In our raw-data-stream we can see the original ip address and in pii-stream we see the following data (some raw event fields, parentEventId, orignal IP and modified value ). (changed the values for privacy)
myshop srv 2022-10-24 15:44:19.618 2022-10-24 15:44:16.569 pii_transformation 6c1bd7bc-fc63-44ce-adb8-3aa969d3dc0a snowplow-enrich-kinesis-3.0.0-rc44-common-3.0.0-rc44 {"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/parent_event/jsonschema/1-0-0","data":{"parentEventId":"6c1bd7bc-fc63-44ce-adb8-3aa969d3dc0a"}}]} {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"96.129.252.226","modifiedValue":"103214af657195533590528b7d068b757e8a61e32e90bafdf9d275338f7f22c4"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-256"}}}}} 2022-10-24 15:44:19.628 com.snowplowanalytics.snowplow pii_transformation jsonschema 1-0-0 2022-10-24 15:44:19.628
myshop srv 2022-10-24 15:44:21.137 2022-10-24 15:44:18.411 pii_transformation 847d0b68-305e-4f5d-9f43-e170c35950b7 snowplow-enrich-kinesis-3.0.0-rc44-common-3.0.0-rc44 {"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/parent_event/jsonschema/1-0-0","data":{"parentEventId":"847d0b68-305e-4f5d-9f43-e170c35950b7"}}]} {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"96.129.252.226","modifiedValue":"103214af657195533590528b7d068b757e8a61e32e90bafdf9d275338f7f22c4"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-256"}}}}} 2022-10-24 15:44:21.150 com.snowplowanalytics.snowplow pii_transformation jsonschema 1-0-0 2022-10-24 15:44:21.150
myshop srv 2022-10-24 15:44:21.137 2022-10-24 15:44:18.282 pii_transformation 3e862dca-3cff-4b0b-b60f-52682e974e37 snowplow-enrich-kinesis-3.0.0-rc44-common-3.0.0-rc44 {"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/parent_event/jsonschema/1-0-0","data":{"parentEventId":"3e862dca-3cff-4b0b-b60f-52682e974e37"}}]} {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"96.129.252.226","modifiedValue":"103214af657195533590528b7d068b757e8a61e32e90bafdf9d275338f7f22c4"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-256"}}}}} 2022-10-24 15:44:21.150 com.snowplowanalytics.snowplow pii_transformation jsonschema 1-0-0 2022-10-24 15:44:21.150
myshop srv 2022-10-24 15:44:21.130 2022-10-24 15:44:16.568 pii_transformation b9ecba1c-9bf2-4a13-97fd-e90c2274d777 snowplow-enrich-kinesis-3.0.0-rc44-common-3.0.0-rc44 {"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/parent_event/jsonschema/1-0-0","data":{"parentEventId":"b9ecba1c-9bf2-4a13-97fd-e90c2274d777"}}]} {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/pii_transformation/jsonschema/1-0-0","data":{"pii":{"pojo":[{"fieldName":"user_ipaddress","originalValue":"96.129.252.226","modifiedValue":"103214af657195533590528b7d068b757e8a61e32e90bafdf9d275338f7f22c4"}]},"strategy":{"pseudonymize":{"hashFunction":"SHA-256"}}}}} 2022-10-24 15:44:21.150 com.snowplowanalytics.snowplow pii_transformation jsonschema 1-0-0 2022-10-24 15:44:21.150
myshop srv 2022-10-24 15:44:21.130 2022-10-24 15:44:16.566 pii_transformation 875f132d-297d-4538-9c89-8fadce34f9de snowplow-enrich-kinesis-3.0.0-rc44-common-3.0.0-rc44 {"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-0","data":[{"schema":"iglu:com.snowplowanalytics.snowplow/parent_event/jsonschema/1-0-0","data":{"parentEventId":"875f132d-297d-4538-9c89-8fadce34f9de"}}]}
while i can see that above data contains parentEventId which must be from the raw even from raw stream but i cannot find any of these ids in raw data from collector or final data in enriched stream.
Here is my enriched PII config
{
"schema": "iglu:com.snowplowanalytics.snowplow.enrichments/pii_enrichment_config/jsonschema/2-0-0",
"data": {
"vendor": "com.snowplowanalytics.snowplow.enrichments",
"name": "pii_enrichment_config",
"emitEvent": true,
"enabled": true,
"parameters": {
"pii": [
{
"pojo": {
"field": "user_ipaddress"
}
},
{
"pojo": {
"field": "user_fingerprint"
}
}
],
"strategy": {
"pseudonymize": {
"hashFunction": "SHA-256",
"salt": "mySuperSecretSaltValue"
}
}
}
}
}
I want to find the parent event from this PII enrichment rows were initiated.
I hope Its clear if not please let me know i will try to rephrase.