Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 10 additions & 14 deletions go/vt/vtorc/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ const (
)

var (
actionableRecoveriesNames = []string{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stats.NewCountersWithMultiLabels does not support a ...string to init with zero, so this becomes unused

RecoverDeadPrimaryRecoveryName,
RecoverPrimaryHasPrimaryRecoveryName,
ElectNewPrimaryRecoveryName,
FixPrimaryRecoveryName,
FixReplicaRecoveryName,
}

countPendingRecoveries = stats.NewGauge("PendingRecoveries", "Count of the number of pending recoveries")

// detectedProblems is used to track the number of detected problems.
Expand All @@ -75,14 +67,17 @@ var (
// shardsLockCounter is a count of in-flight shard locks. Use atomics to read/update.
shardsLockCounter int64

// recoveriesCounterLabels are labels for grouping the counter based stats for recoveries.
recoveriesCounterLabels = []string{"RecoveryType", "Keyspace", "Shard"}

// recoveriesCounter counts the number of recoveries that VTOrc has performed
recoveriesCounter = stats.NewCountersWithSingleLabel("RecoveriesCount", "Count of the different recoveries performed", "RecoveryType", actionableRecoveriesNames...)
recoveriesCounter = stats.NewCountersWithMultiLabels("RecoveriesCount", "Count of the different recoveries performed", recoveriesCounterLabels)

// recoveriesSuccessfulCounter counts the number of successful recoveries that VTOrc has performed
recoveriesSuccessfulCounter = stats.NewCountersWithSingleLabel("SuccessfulRecoveries", "Count of the different successful recoveries performed", "RecoveryType", actionableRecoveriesNames...)
recoveriesSuccessfulCounter = stats.NewCountersWithMultiLabels("SuccessfulRecoveries", "Count of the different successful recoveries performed", recoveriesCounterLabels)

// recoveriesFailureCounter counts the number of failed recoveries that VTOrc has performed
recoveriesFailureCounter = stats.NewCountersWithSingleLabel("FailedRecoveries", "Count of the different failed recoveries performed", "RecoveryType", actionableRecoveriesNames...)
recoveriesFailureCounter = stats.NewCountersWithMultiLabels("FailedRecoveries", "Count of the different failed recoveries performed", recoveriesCounterLabels)

// shardLockTimings measures the timing of LockShard operations.
shardLockTimingsActions = []string{"Lock", "Unlock"}
Expand Down Expand Up @@ -637,13 +632,14 @@ func executeCheckAndRecoverFunction(analysisEntry *inst.ReplicationAnalysis) (er
return err
}
recoveryName := getRecoverFunctionName(checkAndRecoverFunctionCode)
recoveriesCounter.Add(recoveryName, 1)
recoveryLabels := []string{recoveryName, analysisEntry.AnalyzedKeyspace, analysisEntry.AnalyzedShard}
recoveriesCounter.Add(recoveryLabels, 1)
if err != nil {
logger.Errorf("Failed to recover: %+v", err)
recoveriesFailureCounter.Add(recoveryName, 1)
recoveriesFailureCounter.Add(recoveryLabels, 1)
} else {
logger.Info("Recovery succeeded")
recoveriesSuccessfulCounter.Add(recoveryName, 1)
recoveriesSuccessfulCounter.Add(recoveryLabels, 1)
}
if topologyRecovery == nil {
logger.Error("Topology recovery is nil - recovery might have failed")
Expand Down
Loading