tensorframes/project/Build.scala at 754cb967f92ed99d186019c74a79bba61f7a2bf8 · databricks/tensorframes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import sbt.Keys._
import sbt._
import sbtassembly.AssemblyKeys._
import sbtassembly.AssemblyPlugin.autoImport.{ShadeRule => _, assembly => _, assemblyExcludedJars => _, assemblyOption => _, assemblyShadeRules => _}
import sbtassembly._
import sbtsparkpackage.SparkPackagePlugin.autoImport._
import sbtrelease.ReleasePlugin.autoImport._
import ReleaseTransformations._
import sbtprotobuf.ProtobufPlugin
import sbtprotobuf.ProtobufPlugin.autoImport._

object Shading extends Build {

  import Dependencies._

  lazy val commonSettings = Seq(
    name := "tensorframes",
    scalaVersion := sys.props.getOrElse("scala.version", "2.11.8"),
    organization := "databricks",
    sparkVersion := targetSparkVersion,
    licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0")),
    // System conf
    parallelExecution := false,
    javaOptions in run += "-Xmx6G",
    // Add all the python files in the final binary
    unmanagedResourceDirectories in Compile += {
      baseDirectory.value / "src/main/python/"
    },
    version in protobufGenerate := "3.6.1",
    // Spark packages does not like this part
    test in assembly := {},
    // We only use sbt-release to update version numbers for now.
    releaseProcess := Seq[ReleaseStep](
      inquireVersions,
      setReleaseVersion,
      commitReleaseVersion,
      tagRelease,
      setNextVersion,
      commitNextVersion
    ),
    resolvers += Resolver.bintrayRepo("meng", "spark-3.0-snapshots")
  )

  lazy val sparkDependencies = Seq(
    // Spark dependencies
    "org.apache.spark" %% "spark-core" % targetSparkVersion,
    "org.apache.spark" %% "spark-sql" % targetSparkVersion
  )

  // The dependencies that are platform-specific.
  lazy val allPlatformDependencies = Seq(
  )

  // The dependencies for linux only.
  // For cloud environments, it is easier to publish a smaller jar, due to limitations of spark-packages.
  lazy val linuxPlatformDependencies = Seq(
  )

  lazy val nonShadedDependencies = Seq(
    // Normal dependencies
    ModuleID("org.apache.commons", "commons-proxy", "1.0"),
    "org.apache.commons" % "commons-lang3" % "3.4",
    "com.typesafe.scala-logging" %% "scala-logging-api" % "2.1.2",
    "com.typesafe.scala-logging" %% "scala-logging-slf4j" % "2.1.2",
    // TensorFlow dependencies
    "org.tensorflow" % "tensorflow" % targetTensorFlowVersion
  )

  lazy val testDependencies = Seq(
    // Test dependencies
    "org.scalatest" %% "scalatest" % "3.0.0" % "test",
    "org.scalactic" %% "scalactic" % "3.0.0" % "test"
  )

  lazy val shaded = Project("shaded", file(".")).settings(
    target := target.value / "shaded",
    libraryDependencies ++= nonShadedDependencies.map(_ % "provided"),
    libraryDependencies ++= sparkDependencies.map(_ % "provided"),
    libraryDependencies ++= testDependencies,
    libraryDependencies ++= allPlatformDependencies,
    assemblyShadeRules in assembly := Seq(
      ShadeRule.rename("com.google.protobuf.**" -> "org.tensorframes.protobuf3shade.@1").inAll,
      ShadeRule.rename("google.protobuf.**" -> "org.tensorframes.google.protobuf3shade.@1").inAll
    ),
    assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
  ).settings(commonSettings: _*)
  .enablePlugins(ProtobufPlugin)

  // The artifact that is used for spark packages:
  // - includes the binary libraries, shaded protobuf
  // - does not include the other dependencies
  lazy val distribute = Project("distribution", file(".")).settings(
    target := target.value / "distribution",
    libraryDependencies := nonShadedDependencies,
    libraryDependencies ++= sparkDependencies.map(_ % "provided"),
    libraryDependencies ++= testDependencies,
    spName := "databricks/tensorframes",
    spShortDescription := "TensorFlow wrapper for DataFrames on Apache Spark",
    spDescription := {
      """TensorFrames (TensorFlow on Spark DataFrames) lets you manipulate Spark's DataFrames with
        | TensorFlow programs.
          |
          |This package provides a small runtime to express and run TensorFlow computation graphs.
          |TensorFlow programs can be interpreted from:
          | - the official Python API
          | - the semi-official protocol buffer graph description format
          | - the Scala DSL embedded with TensorFrames (experimental)
          |
          |For more information, visit the TensorFrames user guide:
          |
      """.stripMargin
    },
    spAppendScalaVersion := true,
    spHomepage := "https://github.com/databricks/tensorframes",
    spShade := true,
    assembly in spPackage := (assembly in shaded).value,
    credentials += Credentials(Path.userHome / ".ssh" / "credentials_tensorframes.sbt.txt")
  ).settings(commonSettings: _*)
  .enablePlugins(ProtobufPlugin)

  // The java testing artifact: do not shade or embed anything.
  lazy val testing = Project("tfs_testing", file(".")).settings(
    target := target.value / "testing",
    libraryDependencies ++= sparkDependencies.map(_ % "provided"),
    libraryDependencies ++= nonShadedDependencies,
    libraryDependencies ++= testDependencies,
    libraryDependencies ++= allPlatformDependencies,
    // Do not attempt to run tests when building the assembly.
    test in assembly := {},
    // Spark has a dependency on protobuf2, which conflicts with protobuf3.
    // Our own dep needs to be shaded.
    assemblyShadeRules in assembly := Seq(
      ShadeRule.rename("com.google.protobuf.**" -> "org.tensorframes.protobuf3shade.@1").inAll
    ),
    assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false)
  ).settings(commonSettings: _*)
  .enablePlugins(ProtobufPlugin)
}