Skip to content

Commit 3dd1516

Browse files
tabletmanager: remove v22 backwards-compatibility for vterrors migration (#18986)
Signed-off-by: Tim Vaillancourt <[email protected]>
1 parent 488ef3d commit 3dd1516

File tree

5 files changed

+62
-62
lines changed

5 files changed

+62
-62
lines changed

go/vt/vttablet/tabletserver/throttle/base/metric_result.go

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ package base
1818

1919
import (
2020
"errors"
21-
"net"
22-
23-
"google.golang.org/grpc/codes"
24-
"google.golang.org/grpc/status"
2521

2622
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
2723
"vitess.io/vitess/go/vt/vterrors"
@@ -59,30 +55,17 @@ var ErrAppDenied = errors.New("app denied")
5955
// ErrInvalidCheckType is an internal error indicating an unknown check type
6056
var ErrInvalidCheckType = errors.New("unknown throttler check type")
6157

62-
// IsDialTCPError sees if the given error indicates a TCP issue
63-
func IsDialTCPError(err error) bool {
58+
// IsTabletRPCError sees if the given error indicates an issue performing an RPC call
59+
// to the tabletmanager service of a tablet. This is used to parse errors returned by
60+
// the CheckThrottler RPC of grpctmclient.
61+
func IsTabletRPCError(err error) bool {
6462
if err == nil {
6563
return false
6664
}
6765

68-
// match vterror/vtrpc-style errors (v23+).
69-
// v22 (and below) tablets will return vtrpcpb.Code_UNKNOWN
70-
switch vterrors.Code(err) {
71-
case vtrpcpb.Code_UNAVAILABLE, vtrpcpb.Code_DEADLINE_EXCEEDED:
72-
return true
73-
}
74-
75-
// match google grpc errors (v22 and below)
76-
// TODO: remove after v24+
77-
if s, ok := status.FromError(err); ok {
78-
return s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded
79-
}
80-
81-
switch err := err.(type) {
82-
case *net.OpError:
83-
return err.Op == "dial" && err.Net == "tcp"
84-
}
85-
return false
66+
// The tmclient returns vterrors-style errors. Any
67+
// error code other than "OK" indicates a problem.
68+
return vterrors.Code(err) != vtrpcpb.Code_OK
8669
}
8770

8871
type noHostsMetricResult struct{}

go/vt/vttablet/tabletserver/throttle/base/tablet_results.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func AggregateTabletMetricResults(
3333
metricName MetricName,
3434
tabletResultsMap TabletResultMap,
3535
ignoreHostsCount int,
36-
IgnoreDialTCPErrors bool,
36+
ignoreTabletRPCErrors bool,
3737
ignoreHostsThreshold float64,
3838
) (worstMetric MetricResult) {
3939
// probes is known not to change. It can be *replaced*, but not changed.
@@ -49,7 +49,7 @@ func AggregateTabletMetricResults(
4949
}
5050
value, err := tabletMetricResult.Get()
5151
if err != nil {
52-
if IgnoreDialTCPErrors && IsDialTCPError(err) {
52+
if ignoreTabletRPCErrors && IsTabletRPCError(err) {
5353
continue
5454
}
5555
if ignoreHostsCount > 0 {

go/vt/vttablet/tabletserver/throttle/config/mysql_config.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,10 @@ package config
4747

4848
// MySQLConfigurationSettings has the general configuration for all MySQL clusters
4949
type MySQLConfigurationSettings struct {
50-
CacheMillis int // optional, if defined then probe result will be cached, and future probes may use cached value
51-
Port int // Specify if different than 3306; applies to all clusters
52-
IgnoreDialTCPErrors bool // Skip hosts where a metric cannot be retrieved due to TCP dial errors
53-
IgnoreHostsCount int // Number of hosts that can be skipped/ignored even on error or on exceeding thresholds
54-
IgnoreHosts []string // If non empty, substrings to indicate hosts to be ignored/skipped
50+
CacheMillis int // optional, if defined then probe result will be cached, and future probes may use cached value
51+
Port int // Specify if different than 3306; applies to all clusters
52+
IgnoreTabletRPCErrors bool // Skip hosts where a metric cannot be retrieved due to tablet RPC errors
53+
IgnoreDialTCPErrors bool // TODO: deprecate/replace with IgnoreTabletRPCErrors.
54+
IgnoreHostsCount int // Number of hosts that can be skipped/ignored even on error or on exceeding thresholds
55+
IgnoreHosts []string // If non empty, substrings to indicate hosts to be ignored/skipped
5556
}

go/vt/vttablet/tabletserver/throttle/throttler.go

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ import (
7777

7878
tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
7979
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
80-
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
8180
)
8281

8382
const (
@@ -332,7 +331,7 @@ func (throttler *Throttler) initConfig() {
332331

333332
throttler.configSettings = &config.ConfigurationSettings{
334333
MySQLStore: config.MySQLConfigurationSettings{
335-
IgnoreDialTCPErrors: true,
334+
IgnoreTabletRPCErrors: true,
336335
},
337336
}
338337
}
@@ -904,15 +903,10 @@ func (throttler *Throttler) generateTabletProbeFunction(scope base.Scope, probe
904903
metrics := make(base.ThrottleMetrics)
905904

906905
req := &tabletmanagerdatapb.CheckThrottlerRequest{} // We leave AppName empty; it will default to VitessName anyway, and we can save some proto space
907-
resp, gRPCErr := tmClient.CheckThrottler(ctx, probe.Tablet, req)
908-
if gRPCErr != nil {
909-
if vtErrCode := vterrors.Code(gRPCErr); vtErrCode != vtrpcpb.Code_UNKNOWN {
910-
gRPCErr = vterrors.Errorf(vtErrCode, "gRPC error accessing tablet %v. Err=%s", probe.Alias, gRPCErr.Error())
911-
} else {
912-
// TODO: remove after v24+ when all errors are vterrors/vtrpc-based
913-
gRPCErr = fmt.Errorf("gRPC error accessing tablet %v. Err=%w", probe.Alias, gRPCErr)
914-
}
915-
return metricsWithError(gRPCErr)
906+
resp, err := tmClient.CheckThrottler(ctx, probe.Tablet, req)
907+
if err != nil {
908+
err = vterrors.Wrapf(err, "gRPC error accessing tablet %v. Err=%s", probe.Alias, err.Error())
909+
return metricsWithError(err)
916910
}
917911
throttleMetric.Value = resp.Value
918912
if resp.ResponseCode == tabletmanagerdatapb.CheckThrottlerResponseCode_INTERNAL_ERROR {
@@ -1173,9 +1167,9 @@ func (throttler *Throttler) aggregateMetrics() error {
11731167
aggregateTabletsMetrics := func(scope base.Scope, metricName base.MetricName, tabletResultsMap base.TabletResultMap) {
11741168
ignoreHostsCount := throttler.inventory.IgnoreHostsCount
11751169
ignoreHostsThreshold := throttler.inventory.IgnoreHostsThreshold
1176-
ignoreDialTCPErrors := throttler.configSettings.MySQLStore.IgnoreDialTCPErrors
1170+
ignoreTabletRPCErrors := (throttler.configSettings.MySQLStore.IgnoreTabletRPCErrors || throttler.configSettings.MySQLStore.IgnoreDialTCPErrors)
11771171

1178-
aggregatedMetric := base.AggregateTabletMetricResults(metricName, tabletResultsMap, ignoreHostsCount, ignoreDialTCPErrors, ignoreHostsThreshold)
1172+
aggregatedMetric := base.AggregateTabletMetricResults(metricName, tabletResultsMap, ignoreHostsCount, ignoreTabletRPCErrors, ignoreHostsThreshold)
11791173
aggregatedMetricName := metricName.AggregatedName(scope)
11801174
throttler.aggregatedMetrics.Set(aggregatedMetricName, aggregatedMetric, cache.DefaultExpiration)
11811175
if metricName == metricNameUsedAsDefault {

go/vt/vttablet/tabletserver/throttle/throttler_test.go

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package throttle
1818

1919
import (
2020
"context"
21-
"errors"
2221
"fmt"
2322
"math"
2423
"sync"
@@ -31,14 +30,10 @@ import (
3130
"github.com/stretchr/testify/require"
3231
"golang.org/x/exp/maps"
3332

34-
"google.golang.org/grpc"
35-
"google.golang.org/grpc/credentials/insecure"
36-
3733
"vitess.io/vitess/go/protoutil"
38-
39-
"vitess.io/vitess/go/vt/grpcclient"
4034
"vitess.io/vitess/go/vt/topo"
4135
"vitess.io/vitess/go/vt/vtenv"
36+
"vitess.io/vitess/go/vt/vterrors"
4237
"vitess.io/vitess/go/vt/vttablet/grpctmclient"
4338
"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
4439
"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
@@ -49,6 +44,7 @@ import (
4944

5045
tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
5146
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
47+
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
5248
)
5349

5450
var (
@@ -844,19 +840,45 @@ func TestApplyThrottlerConfigAppCheckedMetrics(t *testing.T) {
844840
})
845841
}
846842

847-
func TestIsDialTCPError(t *testing.T) {
848-
// Verify that IsDialTCPError actually recognizes grpc dial errors
849-
cc, err := grpcclient.DialContext(t.Context(), ":0", true, grpc.WithTransportCredentials(insecure.NewCredentials()))
850-
require.NoError(t, err)
851-
defer cc.Close()
843+
func TestIsTabletRPCError(t *testing.T) {
844+
c := grpctmclient.NewClient()
845+
846+
// simulate an RPC cancellation using .CheckThrottler().
847+
t.Run("CANCELLED", func(t *testing.T) {
848+
ctx, cancel := context.WithCancel(t.Context())
849+
cancel() // cancel before check
850+
851+
_, err := c.CheckThrottler(ctx, &topodatapb.Tablet{
852+
Hostname: "this.should.fail",
853+
PortMap: map[string]int32{
854+
"grpc": 12345,
855+
},
856+
}, &tabletmanagerdatapb.CheckThrottlerRequest{})
857+
require.Equal(t, vtrpcpb.Code_CANCELED, vterrors.Code(err))
858+
require.True(t, base.IsTabletRPCError(err))
859+
})
852860

853-
err = cc.Invoke(context.Background(), "/Fail", nil, nil)
861+
// simulate an RPC failure (dial error) using .CheckThrottler() to a host we cannot resolve.
862+
t.Run("DEADLINE_EXCEEDED", func(t *testing.T) {
863+
ctx, cancel := context.WithTimeout(t.Context(), time.Second)
864+
defer cancel()
854865

855-
require.True(t, base.IsDialTCPError(err))
856-
require.True(t, base.IsDialTCPError(fmt.Errorf("wrapped: %w", err)))
866+
_, err := c.CheckThrottler(ctx, &topodatapb.Tablet{
867+
Hostname: "this.should.fail",
868+
PortMap: map[string]int32{
869+
"grpc": 12345,
870+
},
871+
}, &tabletmanagerdatapb.CheckThrottlerRequest{})
872+
require.Equal(t, vtrpcpb.Code_DEADLINE_EXCEEDED, vterrors.Code(err))
873+
require.True(t, base.IsTabletRPCError(err))
874+
require.ErrorContains(t, err, "rpc error: code = DeadlineExceeded desc = latest balancer error: connection error")
875+
})
857876

858-
nonDialErr := errors.New("rpc error: code = NotFound desc = method not found")
859-
require.False(t, base.IsDialTCPError(nonDialErr))
877+
// simulate hypothetical NOT_FOUND RPC failure.
878+
t.Run("NOT_FOUND", func(t *testing.T) {
879+
nonDialErr := vterrors.New(vtrpcpb.Code_NOT_FOUND, "rpc error: code = NotFound desc = method not found")
880+
require.True(t, base.IsTabletRPCError(nonDialErr))
881+
})
860882
}
861883

862884
func TestProbeWithUnavailableHost(t *testing.T) {
@@ -893,7 +915,7 @@ func TestProbeWithUnavailableHost(t *testing.T) {
893915
probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe)
894916

895917
metrics := probeFunc(t.Context(), tmClient)
896-
require.True(t, base.IsDialTCPError(metrics["custom"].Err))
918+
require.True(t, base.IsTabletRPCError(metrics["custom"].Err))
897919

898920
tabletResultsMap := base.TabletResultMap{
899921
"cell1-100": base.MetricResultMap{
@@ -939,7 +961,7 @@ func TestProbeWithEmptyHostAndPort(t *testing.T) {
939961
probeFunc := throttler.generateTabletProbeFunction(base.ShardScope, probe)
940962

941963
metrics := probeFunc(t.Context(), tmClient)
942-
require.True(t, base.IsDialTCPError(metrics["custom"].Err))
964+
require.True(t, base.IsTabletRPCError(metrics["custom"].Err))
943965

944966
tabletResultsMap := base.TabletResultMap{
945967
"cell1-100": base.MetricResultMap{

0 commit comments

Comments
 (0)