Skip to content

Commit 6bc601b

Browse files
committed
Split lzo serializers into a separate sbt project (close #261)
1 parent 9d32ebb commit 6bc601b

35 files changed

+137
-51
lines changed

.github/workflows/lacework.yml

+7
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,10 @@ jobs:
3535
LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }}
3636
LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }}
3737
run: ./lw-scanner image evaluate snowplow/snowplow-s3-loader ${{ steps.ver.outputs.tag }} --build-id ${{ github.run_id }} --no-pull
38+
39+
- name: Scan snowplow-s3-loader lzo
40+
env:
41+
LW_ACCESS_TOKEN: ${{ secrets.LW_ACCESS_TOKEN }}
42+
LW_ACCOUNT_NAME: ${{ secrets.LW_ACCOUNT_NAME }}
43+
LW_SCANNER_SAVE_RESULTS: ${{ !contains(steps.version.outputs.tag, 'rc') }}
44+
run: ./lw-scanner image evaluate snowplow/snowplow-s3-loader ${{ steps.ver.outputs.tag }}-lzo --build-id ${{ github.run_id }} --no-pull

.github/workflows/test_and_publish.yml

+12-13
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,21 @@ jobs:
2424
- name: Check formatting
2525
run: sbt scalafmtCheck
2626

27-
- name: Publish to Docker Hub
27+
- name: Login to Docker Hub
2828
if: startsWith(github.ref, 'refs/tags/')
29-
run: |
30-
docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD
31-
project_version=$(sbt version -Dsbt.log.noformat=true | perl -ne 'print "$1\n" if /info.*(\d+\.\d+\.\d+[^\r\n]*)/' | tail -n 1 | tr -d '\n')
32-
if [[ "${{ github.ref }}" = "refs/tags/${project_version}" ]]
33-
then
34-
echo Publishing to Docker Hub
35-
sbt docker:publish
36-
else
37-
echo "${{ github.ref }} does not match project version $project_version => not publishing"
38-
exit 1
39-
fi
29+
run: docker login -u $DOCKER_USERNAME -p $DOCKER_PASSWORD
4030
env:
4131
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
4232
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
4333

34+
- name: Publish to Docker Hub
35+
if: startsWith(github.ref, 'refs/tags/')
36+
run: sbt 'project main' docker:publish
37+
38+
- name: Publish to Docker Hub lzo
39+
if: startsWith(github.ref, 'refs/tags/')
40+
run: sbt 'project lzo' docker:publish
41+
4442
- name: Build artifacts
4543
run: |
4644
sbt assembly
@@ -57,7 +55,8 @@ jobs:
5755
name: Version ${{ steps.ver.outputs.project_version }}
5856
tag_name: ${{ steps.ver.outputs.project_version }}
5957
files: |
60-
target/scala-2.13/snowplow-s3-loader-${{ steps.ver.outputs.project_version }}.jar
58+
modules/main/target/scala-2.13/snowplow-s3-loader-${{ steps.ver.outputs.project_version }}.jar
59+
modules/lzo/target/scala-2.13/snowplow-s3-loader-lzo-${{ steps.ver.outputs.project_version }}.jar
6160
env:
6261
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
6362

build.sbt

+26-14
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,25 @@
1212
* See the Apache License Version 2.0 for the specific language governing permissions and
1313
* limitations there under.
1414
*/
15+
1516
lazy val root = project.in(file("."))
17+
.aggregate(main, lzo)
18+
19+
lazy val main = project.in(file("modules/main"))
1620
.settings(
17-
name := "snowplow-s3-loader",
18-
description := "Load the contents of a Kinesis stream topic to S3"
21+
name := "snowplow-s3-loader",
1922
)
20-
.settings(BuildSettings.basicSettings)
21-
.settings(BuildSettings.scalifySettings)
22-
.settings(BuildSettings.sbtAssemblySettings)
23-
.settings(BuildSettings.dockerSettings)
24-
.settings(BuildSettings.addExampleConfToTestCp)
23+
.settings(BuildSettings.commonSettings)
2524
.settings(
2625
libraryDependencies ++= Seq(
2726
// Java
2827
Dependencies.Libraries.kinesisClient,
2928
Dependencies.Libraries.kinesisConnector,
3029
Dependencies.Libraries.slf4j,
3130
Dependencies.Libraries.jclOverSlf4j,
32-
Dependencies.Libraries.hadoop,
33-
Dependencies.Libraries.elephantbird,
34-
Dependencies.Libraries.hadoopLZO,
35-
Dependencies.Libraries.apacheCommons,
3631
Dependencies.Libraries.jackson,
3732
Dependencies.Libraries.jacksonCbor,
38-
Dependencies.Libraries.thrift,
3933
Dependencies.Libraries.sentry,
40-
Dependencies.Libraries.collections,
4134
Dependencies.Libraries.jaxbApi,
4235
Dependencies.Libraries.protobuf,
4336
// Scala
@@ -50,10 +43,29 @@ lazy val root = project.in(file("."))
5043
// Scala (test only)
5144
Dependencies.Libraries.specs2,
5245
// Thrift (test only)
53-
Dependencies.Libraries.collectorPayload
46+
Dependencies.Libraries.collectorPayload,
47+
Dependencies.Libraries.thrift % Test,
5448
),
5549
excludeDependencies += "commons-logging" % "commons-logging"
5650
)
5751
.enablePlugins(JavaAppPackaging, DockerPlugin)
5852

53+
lazy val lzo = project.in(file("modules/lzo"))
54+
.settings(
55+
name := "snowplow-s3-loader-lzo",
56+
)
57+
.settings(BuildSettings.commonSettings)
58+
.settings(BuildSettings.lzoSettings)
59+
.settings(
60+
libraryDependencies ++= Seq(
61+
Dependencies.Libraries.hadoop,
62+
Dependencies.Libraries.elephantbird,
63+
Dependencies.Libraries.hadoopLZO,
64+
Dependencies.Libraries.thrift,
65+
Dependencies.Libraries.collections,
66+
)
67+
)
68+
.dependsOn(main % "compile->compile; test->test")
69+
.enablePlugins(JavaAppPackaging, DockerPlugin)
70+
5971
shellPrompt := { _ => "s3-loader> " }

src/main/scala/com/snowplowanalytics/s3/loader/serializers/LzoSerializer.scala renamed to modules/lzo/src/main/scala/com.snowplowanalytics.s3.loader.lzo/LzoSerializer.scala

+4-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1111
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
1212
*/
13-
package com.snowplowanalytics.s3.loader
14-
package serializers
13+
package com.snowplowanalytics.s3.loader.lzo
1514

1615
// Java libs
1716
import java.io.{ByteArrayOutputStream, DataOutputStream}
@@ -23,6 +22,9 @@ import com.hadoop.compression.lzo.LzopCodec
2322
// Elephant bird
2423
import com.twitter.elephantbird.mapreduce.io.RawBlockWriter
2524

25+
import com.snowplowanalytics.s3.loader.Result
26+
import com.snowplowanalytics.s3.loader.serializers.ISerializer
27+
2628
/**
2729
* Object to handle LZO compression of raw events
2830
*/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/*
2+
* Copyright (c) 2014-2022 Snowplow Analytics Ltd. All rights reserved.
3+
*
4+
* This program is licensed to you under the Apache License Version 2.0,
5+
* and you may not use this file except in compliance with the Apache License Version 2.0.
6+
* You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
7+
*
8+
* Unless required by applicable law or agreed to in writing,
9+
* software distributed under the Apache License Version 2.0 is distributed on an
10+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
12+
*/
13+
package com.snowplowanalytics.s3.loader.lzo
14+
15+
import com.snowplowanalytics.s3.loader.MainPlatform
16+
17+
object Main extends MainPlatform {
18+
def main(args: Array[String]): Unit =
19+
withConfig(args)(S3LoaderWithLzo.run)
20+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Copyright (c) 2014-2022 Snowplow Analytics Ltd. All rights reserved.
3+
*
4+
* This program is licensed to you under the Apache License Version 2.0,
5+
* and you may not use this file except in compliance with the Apache License Version 2.0.
6+
* You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
7+
*
8+
* Unless required by applicable law or agreed to in writing,
9+
* software distributed under the Apache License Version 2.0 is distributed on an
10+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
12+
*/
13+
package com.snowplowanalytics.s3.loader.lzo
14+
15+
import com.snowplowanalytics.s3.loader.{Config, S3Loader}
16+
import com.snowplowanalytics.s3.loader.serializers.{GZipSerializer, ISerializer}
17+
18+
object S3LoaderWithLzo extends S3Loader {
19+
20+
override def serializer(config: Config): ISerializer =
21+
config.output.s3.compression match {
22+
case Config.Compression.Lzo => LzoSerializer
23+
case Config.Compression.Gzip => GZipSerializer
24+
}
25+
26+
}

src/test/scala/com/snowplowanalytics/s3/loader/serializers/LzoSerializerSpec.scala renamed to modules/lzo/src/test/scala/com.snowplowanalytics.s3.loader.lzo/LzoSerializerSpec.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1111
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
1212
*/
13-
package com.snowplowanalytics.s3.loader.serializers
13+
package com.snowplowanalytics.s3.loader.lzo
1414

1515
// Java
1616
import java.io.{BufferedInputStream, File, FileInputStream, FileOutputStream}

src/main/scala/com/snowplowanalytics/s3/loader/Main.scala renamed to modules/main/src/main/scala/com/snowplowanalytics/s3/loader/Main.scala

+8-3
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@ import cats.syntax.show._
2323
/**
2424
* The entrypoint class for the Kinesis-S3 Sink application.
2525
*/
26-
object Main {
26+
trait MainPlatform {
2727

2828
val config = Opts
2929
.option[Path]("config", "Path to configuration HOCON file", "c", "filename")
3030
val parser =
3131
Command(s"${generated.Settings.name}-${generated.Settings.version}", "Streaming sink app for S3")(config)
3232

33-
def main(args: Array[String]): Unit =
33+
def withConfig(args: Array[String])(f: Config => Unit): Unit =
3434
parser.parse(args.toList) match {
3535
case Right(c) =>
3636
Config.load(c) match {
3737
case Right(config) =>
38-
S3Loader.run(config)
38+
f(config)
3939
case Left(e) =>
4040
System.err.println(s"Configuration error: $e")
4141
System.exit(1)
@@ -45,3 +45,8 @@ object Main {
4545
System.exit(1)
4646
}
4747
}
48+
49+
object Main extends MainPlatform {
50+
def main(args: Array[String]): Unit =
51+
withConfig(args)(S3Loader.run)
52+
}

src/main/scala/com/snowplowanalytics/s3/loader/S3Loader.scala renamed to modules/main/src/main/scala/com/snowplowanalytics/s3/loader/S3Loader.scala

+11-8
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,26 @@ import com.snowplowanalytics.snowplow.badrows.Processor
2424
import com.snowplowanalytics.s3.loader.Config.Compression
2525
import com.snowplowanalytics.s3.loader.connector.KinesisSourceExecutor
2626
import com.snowplowanalytics.s3.loader.monitoring.Monitoring
27-
import com.snowplowanalytics.s3.loader.serializers.{GZipSerializer, LzoSerializer}
27+
import com.snowplowanalytics.s3.loader.serializers.{GZipSerializer, ISerializer}
2828

29-
object S3Loader {
29+
trait S3Loader {
3030

3131
val logger = LoggerFactory.getLogger(getClass)
3232

3333
val processor = Processor(generated.Settings.name, generated.Settings.version)
3434

35+
def serializer(config: Config): ISerializer =
36+
config.output.s3.compression match {
37+
case Compression.Gzip => GZipSerializer
38+
case Compression.Lzo => throw new IllegalArgumentException("This build of S3 loader does not support LZO compression")
39+
}
40+
3541
def run(config: Config): Unit = {
3642
val monitoring = Monitoring.build(config.monitoring)
3743

3844
// A sink for records that could not be emitted to S3
3945
val badSink = KinesisSink.build(config, monitoring)
4046

41-
val serializer = config.output.s3.compression match {
42-
case Compression.Lzo => LzoSerializer
43-
case Compression.Gzip => GZipSerializer
44-
}
45-
4647
val executor =
4748
new KinesisSourceExecutor(
4849
config.region,
@@ -51,7 +52,7 @@ object S3Loader {
5152
config.purpose,
5253
config.output,
5354
badSink,
54-
serializer,
55+
serializer(config),
5556
monitoring,
5657
config.monitoring
5758
.flatMap(_.metrics.flatMap(_.cloudWatch))
@@ -122,3 +123,5 @@ object S3Loader {
122123
new KinesisConnectorConfiguration(props, credentialsProvider)
123124
}
124125
}
126+
127+
object S3Loader extends S3Loader

project/BuildSettings.scala

+19-5
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ object BuildSettings {
3232

3333
// Basic settings for our app
3434
lazy val basicSettings = Seq(
35-
organization := "com.snowplowanalytics",
36-
scalaVersion := "2.13.6",
35+
organization := "com.snowplowanalytics",
36+
scalaVersion := "2.13.6",
37+
description := "Load the contents of a Kinesis stream topic to S3",
3738
resolvers ++= Dependencies.resolvers,
3839
ThisBuild / dynverVTagPrefix := false,
3940
ThisBuild / dynverSeparator := "-"
@@ -42,7 +43,7 @@ object BuildSettings {
4243
/** Add example config for integration tests */
4344
lazy val addExampleConfToTestCp = Seq(
4445
Test / unmanagedClasspath += {
45-
baseDirectory.value / "config"
46+
baseDirectory.value / "../../config"
4647
}
4748
)
4849

@@ -52,11 +53,15 @@ object BuildSettings {
5253
Docker / packageName := "snowplow/snowplow-s3-loader",
5354
dockerBaseImage := "eclipse-temurin:11-jre-focal",
5455
dockerUpdateLatest := true,
56+
)
57+
58+
lazy val lzoDockerSettings = Seq(
5559
dockerCommands := {
5660
val installLzo = Seq(Cmd("RUN", "mkdir -p /var/lib/apt/lists/partial && apt-get update && apt-get install -y lzop && apt-get purge -y"))
5761
val (h, t) = dockerCommands.value.splitAt(dockerCommands.value.size-4)
5862
h ++ installLzo ++ t
59-
}
63+
},
64+
dockerAlias := dockerAlias.value.withTag(Some(version.value + "-lzo"))
6065
)
6166

6267
// Makes our SBT app settings available from within the app
@@ -82,10 +87,12 @@ object BuildSettings {
8287
case PathList("javax", "servlet", xs @ _*) => MergeStrategy.first
8388
case PathList("org", "objectweb", "asm", xs @ _*) => MergeStrategy.first
8489
case PathList("org", "objectweb", "asm", xs @ _*) => MergeStrategy.first
85-
case PathList("org", "apache", "log4j", _*) => MergeStrategy.last // handled by log4j-over-slf4j
90+
case PathList("org", "apache", "log4j", _*) => MergeStrategy.last
91+
case PathList("org", "apache", "commons", _*) => MergeStrategy.last
8692
case PathList(ps @ _*) if ps.last endsWith ".html" => MergeStrategy.first
8793
case "application.conf" => MergeStrategy.concat
8894
case "module-info.class" => MergeStrategy.discard
95+
case PathList("com", "snowplowanalytics", "s3", "loader", "generated", _*) => MergeStrategy.last
8996
case x =>
9097
val oldStrategy = (assembly / assemblyMergeStrategy).value
9198
oldStrategy(x)
@@ -104,4 +111,11 @@ object BuildSettings {
104111
scalafmtConfig := file(".scalafmt.conf"),
105112
scalafmtOnCompile := false
106113
)
114+
115+
lazy val commonSettings = basicSettings ++ scalifySettings ++ sbtAssemblySettings ++ dockerSettings ++ addExampleConfToTestCp
116+
117+
lazy val lzoSettings = lzoDockerSettings ++ Seq(
118+
Compile / discoveredMainClasses := Seq(),
119+
Compile / mainClass := Some("com.snowplowanalytics.s3.loader.lzo.Main")
120+
)
107121
}

project/Dependencies.scala

+2-4
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ object Dependencies {
2525
val log4j = "2.14.0"
2626
val kinesisClient = "1.14.7"
2727
val kinesisConnector = "1.3.0"
28-
val hadoop = "2.7.7"
28+
val hadoop = "2.10.1"
2929
val elephantbird = "4.17"
3030
val hadoopLZO = "0.4.20"
31-
val apacheCommons = "3.2.1"
3231
val jackson = "2.12.6"
3332
val sentry = "1.7.30"
3433
val collections = "3.2.2" // Address vulnerability
@@ -77,11 +76,10 @@ object Dependencies {
7776
.exclude("org.mortbay.jetty", "jetty-util")
7877
.exclude("org.mortbay.jetty", "jetty")
7978
val collections = "commons-collections" % "commons-collections" % V.collections
80-
val jaxbApi = "javax.xml.bind" % "jaxb-api" % V.jaxbApi
79+
val jaxbApi = "javax.xml.bind" % "jaxb-api" % V.jaxbApi % Runtime
8180
val elephantbird = ("com.twitter.elephantbird" % "elephant-bird-core" % V.elephantbird)
8281
.exclude("com.hadoop.gplcompression", "hadoop-lzo")
8382
val hadoopLZO = "com.hadoop.gplcompression" % "hadoop-lzo" % V.hadoopLZO
84-
val apacheCommons = "org.apache.directory.studio" % "org.apache.commons.collections" % V.apacheCommons
8583
val sentry = "io.sentry" % "sentry" % V.sentry
8684
val protobuf = "com.google.protobuf" % "protobuf-java" % V.protobuf
8785

project/build.properties

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
sbt.version=1.5.2
1+
sbt.version=1.5.5

0 commit comments

Comments
 (0)