Skip to content

Commit f63fa3d

Browse files
committed
Loader: add telemetry (close #617)
1 parent 3b7444a commit f63fa3d

File tree

23 files changed

+350
-104
lines changed

23 files changed

+350
-104
lines changed

config/loader/aws/databricks.config.reference.hocon

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,4 +227,34 @@
227227
# the next time it will get this (or anything) from a queue has this delay
228228
"sqsVisibility": "5 minutes"
229229
}
230+
231+
# Optional. Configure telemetry
232+
# All the fields are optional
233+
"telemetry": {
234+
# Set to true to disable telemetry
235+
"disable": false
236+
# Interval for the heartbeat event
237+
"interval": 15 minutes
238+
# HTTP method used to send the heartbeat event
239+
"method": "POST"
240+
# URI of the collector receiving the heartbeat event
241+
"collectorUri": "collector-g.snowplowanalytics.com"
242+
# Port of the collector receiving the heartbeat event
243+
"collectorPort": 443
244+
# Whether to use https or not
245+
"secure": true
246+
# Identifier intended to tie events together across modules,
247+
# infrastructure and apps when used consistently
248+
"userProvidedId": "my_pipeline"
249+
# ID automatically generated upon running a modules deployment script
250+
# Intended to identify each independent module, and the infrastructure it controls
251+
"autoGeneratedId": "hfy67e5ydhtrd"
252+
# Unique identifier for the VM instance
253+
# Unique for each instance of the app running within a module
254+
"instanceId": "665bhft5u6udjf"
255+
# Name of the terraform module that deployed the app
256+
"moduleName": "rdb-loader-ce"
257+
# Version of the terraform module that deployed the app
258+
"moduleVersion": "1.0.0"
259+
}
230260
}

config/loader/aws/redshift.config.reference.hocon

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,34 @@
204204
# the next time it will get this (or anything) from a queue has this delay
205205
"sqsVisibility": "5 minutes"
206206
}
207+
208+
# Optional. Configure telemetry
209+
# All the fields are optional
210+
"telemetry": {
211+
# Set to true to disable telemetry
212+
"disable": false
213+
# Interval for the heartbeat event
214+
"interval": 15 minutes
215+
# HTTP method used to send the heartbeat event
216+
"method": "POST"
217+
# URI of the collector receiving the heartbeat event
218+
"collectorUri": "collector-g.snowplowanalytics.com"
219+
# Port of the collector receiving the heartbeat event
220+
"collectorPort": 443
221+
# Whether to use https or not
222+
"secure": true
223+
# Identifier intended to tie events together across modules,
224+
# infrastructure and apps when used consistently
225+
"userProvidedId": "my_pipeline"
226+
# ID automatically generated upon running a modules deployment script
227+
# Intended to identify each independent module, and the infrastructure it controls
228+
"autoGeneratedId": "hfy67e5ydhtrd"
229+
# Unique identifier for the VM instance
230+
# Unique for each instance of the app running within a module
231+
"instanceId": "665bhft5u6udjf"
232+
# Name of the terraform module that deployed the app
233+
"moduleName": "rdb-loader-ce"
234+
# Version of the terraform module that deployed the app
235+
"moduleVersion": "1.0.0"
236+
}
207237
}

config/loader/aws/snowflake.config.reference.hocon

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,4 +242,34 @@
242242
# the next time it will get this (or anything) from a queue has this delay
243243
"sqsVisibility": "5 minutes"
244244
}
245+
246+
# Optional. Configure telemetry
247+
# All the fields are optional
248+
"telemetry": {
249+
# Set to true to disable telemetry
250+
"disable": false
251+
# Interval for the heartbeat event
252+
"interval": 15 minutes
253+
# HTTP method used to send the heartbeat event
254+
"method": "POST"
255+
# URI of the collector receiving the heartbeat event
256+
"collectorUri": "collector-g.snowplowanalytics.com"
257+
# Port of the collector receiving the heartbeat event
258+
"collectorPort": 443
259+
# Whether to use https or not
260+
"secure": true
261+
# Identifier intended to tie events together across modules,
262+
# infrastructure and apps when used consistently
263+
"userProvidedId": "my_pipeline"
264+
# ID automatically generated upon running a modules deployment script
265+
# Intended to identify each independent module, and the infrastructure it controls
266+
"autoGeneratedId": "hfy67e5ydhtrd"
267+
# Unique identifier for the VM instance
268+
# Unique for each instance of the app running within a module
269+
"instanceId": "665bhft5u6udjf"
270+
# Name of the terraform module that deployed the app
271+
"moduleName": "rdb-loader-ce"
272+
# Version of the terraform module that deployed the app
273+
"moduleVersion": "1.0.0"
274+
}
245275
}

config/loader/gcp/snowflake.config.reference.hocon

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,34 @@
208208
# before considering Snowflake unhealthy
209209
"nonLoading": "10 minutes"
210210
}
211+
212+
# Optional. Configure telemetry
213+
# All the fields are optional
214+
"telemetry": {
215+
# Set to true to disable telemetry
216+
"disable": false
217+
# Interval for the heartbeat event
218+
"interval": 15 minutes
219+
# HTTP method used to send the heartbeat event
220+
"method": "POST"
221+
# URI of the collector receiving the heartbeat event
222+
"collectorUri": "collector-g.snowplowanalytics.com"
223+
# Port of the collector receiving the heartbeat event
224+
"collectorPort": 443
225+
# Whether to use https or not
226+
"secure": true
227+
# Identifier intended to tie events together across modules,
228+
# infrastructure and apps when used consistently
229+
"userProvidedId": "my_pipeline"
230+
# ID automatically generated upon running a modules deployment script
231+
# Intended to identify each independent module, and the infrastructure it controls
232+
"autoGeneratedId": "hfy67e5ydhtrd"
233+
# Unique identifier for the VM instance
234+
# Unique for each instance of the app running within a module
235+
"instanceId": "665bhft5u6udjf"
236+
# Name of the terraform module that deployed the app
237+
"moduleName": "rdb-loader-ce"
238+
# Version of the terraform module that deployed the app
239+
"moduleVersion": "1.0.0"
240+
}
211241
}

modules/common-transformer-stream/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/transformer/stream/common/Config.scala

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import io.circe.generic.semiauto._
2323

2424
import scala.concurrent.duration.{Duration, FiniteDuration}
2525

26+
import com.snowplowanalytics.snowplow.rdbloader.common.telemetry.Telemetry
2627
import com.snowplowanalytics.snowplow.rdbloader.common.config.{ConfigUtils, TransformerConfig}
2728
import com.snowplowanalytics.snowplow.rdbloader.common.config.implicits._
2829
import com.snowplowanalytics.snowplow.rdbloader.common.config.TransformerConfig.Compression
@@ -35,7 +36,7 @@ final case class Config(input: Config.StreamInput,
3536
queue: Config.QueueConfig,
3637
formats: TransformerConfig.Formats,
3738
monitoring: Config.Monitoring,
38-
telemetry: Config.Telemetry,
39+
telemetry: Telemetry.Config,
3940
featureFlags: TransformerConfig.FeatureFlags,
4041
validations: TransformerConfig.Validations)
4142

@@ -155,23 +156,6 @@ object Config {
155156
deriveDecoder[MetricsReporters]
156157
}
157158

158-
case class Telemetry(
159-
disable: Boolean,
160-
interval: FiniteDuration,
161-
method: String,
162-
collectorUri: String,
163-
collectorPort: Int,
164-
secure: Boolean,
165-
userProvidedId: Option[String],
166-
autoGeneratedId: Option[String],
167-
instanceId: Option[String],
168-
moduleName: Option[String],
169-
moduleVersion: Option[String]
170-
)
171-
172-
implicit val telemetryDecoder: Decoder[Telemetry] =
173-
deriveDecoder[Telemetry]
174-
175159
trait Decoders extends TransformerConfig.Decoders {
176160

177161
implicit val streamInputConfigDecoder: Decoder[StreamInput] =

modules/common-transformer-stream/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/transformer/stream/common/Resources.scala

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,16 @@ import io.circe.Json
2424
import cats.implicits._
2525
import cats.effect._
2626

27+
import org.http4s.client.blaze.BlazeClientBuilder
28+
2729
import com.snowplowanalytics.iglu.client.Client
2830
import com.snowplowanalytics.iglu.client.resolver.{InitListCache, InitSchemaCache, Resolver}
2931

3032
import com.snowplowanalytics.snowplow.rdbloader.common.cloud.{Queue, BlobStorage}
33+
import com.snowplowanalytics.snowplow.rdbloader.common.telemetry.Telemetry
3134
import com.snowplowanalytics.snowplow.rdbloader.common.transformation.EventUtils
3235

3336
import com.snowplowanalytics.snowplow.rdbloader.transformer.stream.common.metrics.Metrics
34-
import com.snowplowanalytics.snowplow.rdbloader.transformer.stream.common.telemetry.Telemetry
3537
import com.snowplowanalytics.snowplow.rdbloader.transformer.stream.common.sources.Checkpointer
3638

3739
case class Resources[F[_], C](
@@ -72,7 +74,16 @@ object Resources {
7274
instanceId <- mkTransformerInstanceId
7375
blocker <- Blocker[F]
7476
metrics <- Resource.eval(Metrics.build[F](blocker, config.monitoring.metrics))
75-
telemetry <- Telemetry.build[F](config, buildName, buildVersion, executionContext)
77+
httpClient <- BlazeClientBuilder[F](executionContext).resource
78+
telemetry <- Telemetry.build[F](
79+
config.telemetry,
80+
buildName,
81+
buildVersion,
82+
httpClient,
83+
AppId.appId,
84+
getRegionFromConfig(config),
85+
getCloudFromConfig(config)
86+
)
7687
inputStream <- mkSource(blocker, config.input, config.monitoring)
7788
blobStorage <- mkSink(blocker, config.output)
7889
} yield
@@ -112,4 +123,17 @@ object Resources {
112123
.eval(Sync[F].delay(UUID.randomUUID()))
113124
.evalTap(id => logger.info(s"Instantiated $id shredder instance"))
114125
}
126+
127+
private def getRegionFromConfig(config: Config): Option[String] =
128+
config.input match {
129+
case c: Config.StreamInput.Kinesis => Some(c.region.name)
130+
case _ => None
131+
}
132+
133+
private def getCloudFromConfig(config: Config): Option[Telemetry.Cloud] =
134+
config.input match {
135+
case _: Config.StreamInput.Kinesis => Some(Telemetry.Cloud.Aws)
136+
case _: Config.StreamInput.Pubsub => Some(Telemetry.Cloud.Gcp)
137+
case _ => None
138+
}
115139
}

0 commit comments

Comments
 (0)