Skip to content

Commit

Permalink
HTTP 500 Errors Based on ZK Storage (#197)
Browse files Browse the repository at this point in the history
* error handling around loading data.   resolves issue is there is a dangling path with no data or with corrupt data

* update based on PR feedback

* specific action for failure to read zk state

* the process must end if we have protocol buffer exception

* updates based on feedback on cleaning up code
  • Loading branch information
kensipe authored Apr 4, 2018
1 parent 0659f67 commit f056185
Showing 1 changed file with 21 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ package dcos.metronome
package repository

import akka.actor.{ Actor, ActorLogging, Stash }
import com.google.protobuf.InvalidProtocolBufferException
import mesosphere.marathon.StoreCommandFailedException
import org.apache.zookeeper.KeeperException.NoNodeException

import scala.concurrent.Future
import scala.util.control.NonFatal
import scala.util.{ Failure, Success }

trait LoadContentOnStartup[Id, Model] extends Actor with Stash with ActorLogging {
Expand Down Expand Up @@ -31,11 +35,7 @@ trait LoadContentOnStartup[Id, Model] extends Actor with Stash with ActorLogging

def loadAll(): Unit = {
val loadAllFuture = repo.ids().flatMap { ids =>
ids.foldLeft(Future.successful(List.empty[Model])) {
case (resultsFuture, id) => resultsFuture.flatMap { res =>
repo.get(id).map(_.map(_ :: res).getOrElse(res))
}
}
Future.sequence(ids.map(id => getModel(id))).map(_.flatten.toList)
}
val me = self
loadAllFuture.onComplete {
Expand All @@ -45,6 +45,22 @@ trait LoadContentOnStartup[Id, Model] extends Actor with Stash with ActorLogging
throw ex
}
}

private def getModel(id: Id): Future[Option[Model]] = {
repo.get(id).recoverWith {
case ex: StoreCommandFailedException =>
ex.getCause match {
case cause: NoNodeException =>
log.error(s"ID $id or job-specs znode missing. Zk will need to be manually repaired. Exception message: ${cause.getMessage}")
Future.successful(None)
case NonFatal(cause) =>
log.error(s"Unexpected exception occurred in reading zk at startup. Exception message: ${cause.getMessage}")
// We need crash strategy similar to marathon, for now we can NOT continue with such a zk failure.
System.exit(-1)
Future.failed(cause)
}
}
}
}

object LoadContentOnStartup {
Expand Down

0 comments on commit f056185

Please sign in to comment.