Skip to content

Commit

Permalink
Publish assembly jar and formatting (#13)
Browse files Browse the repository at this point in the history
- Add in publish for assembly jar
- Shade our version of scopt to avoid potential classpath issues
- Run scalafmt formatter
  • Loading branch information
jamesfielder authored Jan 16, 2022
1 parent e147862 commit 5920e40
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 18 deletions.
13 changes: 11 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,18 @@ lazy val sparkdistcp = (project in file("."))
libraryDependencies += scopt,
libraryDependencies ++= spark(scalaVersion.value),
libraryDependencies += "org.scala-lang.modules" %% "scala-collection-compat" % Dependencies.collectionCompat % Provided,
assemblyPackageScala /assembleArtifact := false,
assemblyPackageScala / assembleArtifact := false,
assembly / assemblyOption ~= {
_.withIncludeScala(false)
},
assembly / Keys.test := {},
assembly / artifact := {
val art = (assembly / artifact).value
art.withClassifier(Some("assembly"))
},
ThisBuild / assemblyShadeRules := Seq(
ShadeRule.rename("scopt.**" -> "internal.spark.distcp.scopt.@1").inAll
),
licenses := Seq(
"APL2" -> url("http://www.apache.org/licenses/LICENSE-2.0.txt")
),
Expand Down Expand Up @@ -85,5 +93,6 @@ lazy val sparkdistcp = (project in file("."))
publishConfiguration := publishConfiguration.value
.withOverwrite(isSnapshot.value),
publishLocalConfiguration := publishLocalConfiguration.value
.withOverwrite(isSnapshot.value)
.withOverwrite(isSnapshot.value),
addArtifact(assembly / artifact, assembly)
)
4 changes: 3 additions & 1 deletion src/main/scala/com/coxautodata/SparkDistCP.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ object SparkDistCP extends Logging {

val config = OptionsParsing.parse(args)
val sparkSession = SparkSession.builder().getOrCreate()
val options = config.options.withFiltersFromFile(sparkSession.sparkContext.hadoopConfiguration)
val options = config.options.withFiltersFromFile(
sparkSession.sparkContext.hadoopConfiguration
)
val (src, dest) = config.sourceAndDestPaths
run(sparkSession, src, dest, options)

Expand Down
33 changes: 18 additions & 15 deletions src/main/scala/com/coxautodata/SparkDistCPOptions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,24 @@ case class SparkDistCPOptions(
hadoopConfiguration: Configuration
): SparkDistCPOptions = {

val fn = filters.map(f => {
try {
val path = new Path(f)
val fs = path.getFileSystem(hadoopConfiguration)

val in = fs.open(path)

val r = scala.io.Source.fromInputStream(in).getLines().map(_.r).toList

in.close()
r
} catch {
case e:IOException => throw new RuntimeException("Invalid filter file "+f, e)
}
}).getOrElse(List.empty)
val fn = filters
.map(f => {
try {
val path = new Path(f)
val fs = path.getFileSystem(hadoopConfiguration)

val in = fs.open(path)

val r = scala.io.Source.fromInputStream(in).getLines().map(_.r).toList

in.close()
r
} catch {
case e: IOException =>
throw new RuntimeException("Invalid filter file " + f, e)
}
})
.getOrElse(List.empty)

this.copy(filterNot = fn)

Expand Down

0 comments on commit 5920e40

Please sign in to comment.