Skip to content

Commit

Permalink
Split out tests
Browse files Browse the repository at this point in the history
  • Loading branch information
derrickburns committed Jan 18, 2024
1 parent 4010048 commit 894d15c
Showing 1 changed file with 35 additions and 20 deletions.
55 changes: 35 additions & 20 deletions src/test/scala/com/massivedatascience/clusterer/KMeansSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,39 +38,65 @@ class KMeansSuite extends AnyFunSuite with LocalClusterSparkContext {

import com.massivedatascience.clusterer.KMeansSelector._

test("coverage") {

test("use two runs") {
val seed = 0
val r = new Random(seed)
val data = sc.parallelize[Vector](Array.fill(1000)(Vectors.dense(Array.fill(20)(r.nextDouble()))))

KMeans.train(data, k = 20, maxIterations = 10)

KMeans.train(data, k = 20, maxIterations = 10, runs = 2)
}

KMeans.train(data, k = 20, maxIterations = 10, runs = 1, mode = KMeansSelector.RANDOM)
test("use change tracking clustering algorithm") {

KMeans.train(data, k = 20, maxIterations = 10, runs = 1, distanceFunctionNames = Seq(BregmanPointOps.EUCLIDEAN))
val seed = 0
val r = new Random(seed)
val data = sc.parallelize[Vector](Array.fill(1000)(Vectors.dense(Array.fill(20)(r.nextDouble()))))

KMeans.train(data, k = 20, maxIterations = 10, runs = 1,
distanceFunctionNames = Seq(BregmanPointOps.EUCLIDEAN),
clustererName = MultiKMeansClusterer.CHANGE_TRACKING)
}
test("use reseed clustering algorithm") {
val seed = 0
val r = new Random(seed)
val data = sc.parallelize[Vector](Array.fill(1000)(Vectors.dense(Array.fill(20)(r.nextDouble()))))

KMeans.train(data, k = 50, maxIterations = 10, runs = 1,
distanceFunctionNames = Seq(BregmanPointOps.EUCLIDEAN),
clustererName = MultiKMeansClusterer.RESEED)
}

test ("cluster time series data using Haar transform") {
val seed = 0
val r = new Random(seed)
val data = sc.parallelize[Vector](Array.fill(1000)(Vectors.dense(Array.fill(20)(r.nextDouble()))))

KMeans.timeSeriesTrain(
new RunConfig(20, 1, 0, 10),
data.map(WeightedVector.apply),
KMeansSelector(KMeansSelector.K_MEANS_PARALLEL),
BregmanPointOps(BregmanPointOps.EUCLIDEAN),
MultiKMeansClusterer(MultiKMeansClusterer.COLUMN_TRACKING),
Embedding(HAAR_EMBEDDING))
Embedding(HAAR_EMBEDDING))
}

KMeans.train(data, k = 20, maxIterations = 10, runs = 1, clustererName = MultiKMeansClusterer.MINI_BATCH_10)
test("use random initializer") {
val seed = 0
val r = new Random(seed)
val data = sc.parallelize[Vector](Array.fill(1000)(Vectors.dense(Array.fill(20)(r.nextDouble()))))

KMeans.train(data, k = 20, maxIterations = 10, runs = 1, mode = KMeansSelector.RANDOM)
}

test("use maximum of 10 iterations") {
val seed = 0
val r = new Random(seed)
val data = sc.parallelize[Vector](Array.fill(1000)(Vectors.dense(Array.fill(20)(r.nextDouble()))))

KMeans.train(data, k = 20, maxIterations = 10)
}


test("sparse vector iterator") {

import com.massivedatascience.linalg._
Expand Down Expand Up @@ -179,23 +205,12 @@ class KMeansSuite extends AnyFunSuite with LocalClusterSparkContext {
assert(!iter.hasNext)
}



test("iteratively train") {
test("use iterative training") {
val seed = 0
val r = new Random(seed)

val data = sc.parallelize[Vector](Array.fill(1000)(Vectors.dense(Array.fill(20)(r.nextDouble()))))

/*
runConfig: RunConfig,
pointOps: Seq[BregmanPointOps],
dataSets: Seq[RDD[BregmanPoint]],
initializer: KMeansSelector,
clusterer: MultiKMeansClusterer)
*/

val ops = BregmanPointOps(BregmanPointOps.EUCLIDEAN)
val cached = data.map(WeightedVector.apply).map(ops.toPoint).cache()
KMeans.iterativelyTrain(
Expand Down

0 comments on commit 894d15c

Please sign in to comment.