diff --git a/node.go b/node.go index 9bc5961c..0a2dbf58 100644 --- a/node.go +++ b/node.go @@ -176,7 +176,7 @@ func (s *Node) Start() error { // Recover a node by forcing a new cluster configuration. // -// DEPRECATED: Use ReconfigureMembership instead, which does not require +// Deprecated: use ReconfigureMembershipExt instead, which does not require // instantiating a new Node object. func (s *Node) Recover(cluster []NodeInfo) error { return s.server.Recover(cluster) @@ -217,11 +217,11 @@ func GenerateID(address string) uint64 { return bindings.GenerateID(address) } -// ReconfigureMembership can be used to recover a cluster whose majority of -// nodes have died, and therefore has become unavailable. +// ReconfigureMembership forces a new cluster configuration. // -// It forces appending a new configuration to the raft log stored in the given -// directory, effectively replacing the current configuration. +// Deprecated: this function ignores the provided node roles and makes every +// node in the new configuration a voter. Use ReconfigureMembershipExt, which +// respects the provided roles. func ReconfigureMembership(dir string, cluster []NodeInfo) error { server, err := bindings.NewNode(context.Background(), 1, "1", dir) if err != nil { @@ -231,14 +231,29 @@ func ReconfigureMembership(dir string, cluster []NodeInfo) error { return server.Recover(cluster) } -// ReconfigureMembershipExt can be used to recover a cluster whose majority of -// nodes have died, and therefore has become unavailable. +// ReconfigureMembershipExt forces a new cluster configuration. // -// It forces appending a new configuration to the raft log stored in the given -// directory, effectively replacing the current configuration. -// In comparision with ReconfigureMembership, this function takes the node role -// into account and makes use of a dqlite API that supports extending the -// NodeInfo struct. +// This function is useful to revive a cluster that can't achieve quorum in its +// old configuration because some nodes can't be brought online. Forcing a new +// configuration is unsafe, and you should follow these steps to avoid data +// loss and inconsistency: +// +// 1. Make sure no dqlite node in the cluster is running. +// +// 2. Identify all dqlite nodes that have survived and that you want to be part +// of the recovered cluster. Call this the "new member list". +// +// 3. From the nodes in the new member list, find the one with the most +// up-to-date raft term and log. Call this the "template node". +// +// 4. Invoke ReconfigureMembershipExt exactly one time, on the template node. +// The arguments are the data directory of the template node and the new +// member list. +// +// 5. Copy the data directory of the template node to all other nodes in the +// new member list, replacing their previous data directories. +// +// 6. Restart all nodes in the new member list. func ReconfigureMembershipExt(dir string, cluster []NodeInfo) error { server, err := bindings.NewNode(context.Background(), 1, "1", dir) if err != nil {