Skip to content
10 changes: 8 additions & 2 deletions go/cmd/vtctldclient/command/shards.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ that shard.`,
// GenerateShardRanges outputs a set of shard ranges assuming a (mostly)
// equal distribution of N shards.
GenerateShardRanges = &cobra.Command{
Use: "GenerateShardRanges <num_shards>",
Use: "GenerateShardRanges <num_shards> [--chars]",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think --chars makes sense. Other things I thought about:

  • --num-digits
  • --digits
  • --hex-char-count
  • --char-count
  • --range-digits

Let me know if you or anyone thinks any of those are better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Claude likes --digits. It also suggests --width.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about --hex-format-length or --format-length

Short: "Print a set of shard ranges assuming a keyspace with N shards.",
DisableFlagsInUseLine: true,
Args: cobra.ExactArgs(1),
Expand All @@ -68,7 +68,7 @@ that shard.`,

cli.FinishedParsing(cmd)

shards, err := key.GenerateShardRanges(n)
shards, err := key.GenerateShardRanges(n, generateShardRangesOptions.Chars)
if err != nil {
return err
}
Expand Down Expand Up @@ -210,6 +210,10 @@ var createShardOptions = struct {
IncludeParent bool
}{}

var generateShardRangesOptions = struct {
Chars int
}{}

func commandCreateShard(cmd *cobra.Command, args []string) error {
keyspace, shard, err := topoproto.ParseKeyspaceShard(cmd.Flags().Arg(0))
if err != nil {
Expand Down Expand Up @@ -663,6 +667,8 @@ func init() {

Root.AddCommand(GetShard)
Root.AddCommand(GetShardReplication)

GenerateShardRanges.Flags().IntVar(&generateShardRangesOptions.Chars, "chars", 0, "The number of hex characters to use for the shard range endpoints. If not set, it will be automatically computed based on the number of requested shards.")
Root.AddCommand(GenerateShardRanges)

RemoveShardCell.Flags().BoolVarP(&removeShardCellOptions.Force, "force", "f", false, "Proceed even if the cell's topology server cannot be reached. The assumption is that you turned down the entire cell, and just need to update the global topo data.")
Expand Down
67 changes: 26 additions & 41 deletions go/vt/key/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,23 +379,31 @@ func EvenShardsKeyRange(i, n int) (*topodatapb.KeyRange, error) {
}

// GenerateShardRanges returns shard ranges assuming a keyspace with N shards.
func GenerateShardRanges(shards int) ([]string, error) {
var format string
var maxShards int

func GenerateShardRanges(shards int, hexChars int) ([]string, error) {
switch {
case shards <= 0:
return nil, errors.New("shards must be greater than zero")
case shards == 1:
return []string{"-"}, nil
case shards <= 256:
format = "%02x"
maxShards = 256
if hexChars == 0 {
hexChars = 2
}
case shards <= 65536:
format = "%04x"
maxShards = 65536
if hexChars == 0 {
hexChars = 4
}
default:
return nil, errors.New("this function does not support more than 65336 shards in a single keyspace")
}

maxShards := math.Pow(16, float64(hexChars))
if shards > int(maxShards) {
return nil, fmt.Errorf("the given number of shards (%d) is too high for the given number of characters to use (%d)", shards, hexChars)
}

format := fmt.Sprintf("%%0%dx", hexChars)

rangeFormatter := func(start, end int) string {
var (
startKid string
Expand All @@ -406,47 +414,24 @@ func GenerateShardRanges(shards int) ([]string, error) {
startKid = fmt.Sprintf(format, start)
}

if end != maxShards {
if end != int(maxShards) {
endKid = fmt.Sprintf(format, end)
}

return fmt.Sprintf("%s-%s", startKid, endKid)
}

start := 0
end := 0

// If shards does not divide evenly into maxShards, then there is some lossiness,
// where each shard is smaller than it should technically be (if, for example, size == 25.6).
// If we choose to keep everything in ints, then we have two choices:
// - Have every shard in #numshards be a uniform size, tack on an additional shard
// at the end of the range to account for the loss. This is bad because if you ask for
// 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
// It's also bad because one shard will have much different data distribution than the rest.
// - Expand the final shard to include whatever is left in the keyrange. This will give the
// correct number of shards, which is good, but depending on how lossy each individual shard is,
// you could end with that final shard being significantly larger than the rest of the shards,
// so this doesn't solve the data distribution problem.
//
// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
// we can re-truncate the integer end on each iteration, which spreads the lossiness more
// evenly across the shards.
//
// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
// which you can see in the tests.
size := float64(maxShards) / float64(shards)
realEnd := float64(0)
shardRanges := make([]string, 0, shards)

for i := 1; i < shards; i++ {
realEnd = float64(i) * size

end = int(realEnd)
shardRanges = append(shardRanges, rangeFormatter(start, end))
start = end
boundaries := make([]int, 0, shards+1)
for i := 0; i < shards; i++ {
boundaries = append(boundaries, int(float64(i)*maxShards/float64(shards)))
}

shardRanges = append(shardRanges, rangeFormatter(start, maxShards))
shardRanges := make([]string, 0, shards)
shardRanges = append(shardRanges, rangeFormatter(0, boundaries[1])) // first shard
for i := 1; i < shards-1; i++ {
shardRanges = append(shardRanges, rangeFormatter(boundaries[i], boundaries[i+1]))
}
shardRanges = append(shardRanges, rangeFormatter(boundaries[shards-1], int(maxShards))) // last shard

return shardRanges, nil
}
56 changes: 49 additions & 7 deletions go/vt/key/key_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1548,11 +1548,17 @@ func TestGenerateShardRanges(t *testing.T) {
[]string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"},
false,
},
{
"works for very large number of shards",
args{512},
[]string{"-0080", "0080-0100", "0100-0180", "0180-0200", "0200-0280", "0280-0300", "0300-0380", "0380-0400", "0400-0480", "0480-0500", "0500-0580", "0580-0600", "0600-0680", "0680-0700", "0700-0780", "0780-0800", "0800-0880", "0880-0900", "0900-0980", "0980-0a00", "0a00-0a80", "0a80-0b00", "0b00-0b80", "0b80-0c00", "0c00-0c80", "0c80-0d00", "0d00-0d80", "0d80-0e00", "0e00-0e80", "0e80-0f00", "0f00-0f80", "0f80-1000", "1000-1080", "1080-1100", "1100-1180", "1180-1200", "1200-1280", "1280-1300", "1300-1380", "1380-1400", "1400-1480", "1480-1500", "1500-1580", "1580-1600", "1600-1680", "1680-1700", "1700-1780", "1780-1800", "1800-1880", "1880-1900", "1900-1980", "1980-1a00", "1a00-1a80", "1a80-1b00", "1b00-1b80", "1b80-1c00", "1c00-1c80", "1c80-1d00", "1d00-1d80", "1d80-1e00", "1e00-1e80", "1e80-1f00", "1f00-1f80", "1f80-2000", "2000-2080", "2080-2100", "2100-2180", "2180-2200", "2200-2280", "2280-2300", "2300-2380", "2380-2400", "2400-2480", "2480-2500", "2500-2580", "2580-2600", "2600-2680", "2680-2700", "2700-2780", "2780-2800", "2800-2880", "2880-2900", "2900-2980", "2980-2a00", "2a00-2a80", "2a80-2b00", "2b00-2b80", "2b80-2c00", "2c00-2c80", "2c80-2d00", "2d00-2d80", "2d80-2e00", "2e00-2e80", "2e80-2f00", "2f00-2f80", "2f80-3000", "3000-3080", "3080-3100", "3100-3180", "3180-3200", "3200-3280", "3280-3300", "3300-3380", "3380-3400", "3400-3480", "3480-3500", "3500-3580", "3580-3600", "3600-3680", "3680-3700", "3700-3780", "3780-3800", "3800-3880", "3880-3900", "3900-3980", "3980-3a00", "3a00-3a80", "3a80-3b00", "3b00-3b80", "3b80-3c00", "3c00-3c80", "3c80-3d00", "3d00-3d80", "3d80-3e00", "3e00-3e80", "3e80-3f00", "3f00-3f80", "3f80-4000", "4000-4080", "4080-4100", "4100-4180", "4180-4200", "4200-4280", "4280-4300", "4300-4380", "4380-4400", "4400-4480", "4480-4500", "4500-4580", "4580-4600", "4600-4680", "4680-4700", "4700-4780", "4780-4800", "4800-4880", "4880-4900", "4900-4980", "4980-4a00", "4a00-4a80", "4a80-4b00", "4b00-4b80", "4b80-4c00", "4c00-4c80", "4c80-4d00", "4d00-4d80", "4d80-4e00", "4e00-4e80", "4e80-4f00", "4f00-4f80", "4f80-5000", "5000-5080", "5080-5100", "5100-5180", "5180-5200", "5200-5280", "5280-5300", "5300-5380", "5380-5400", "5400-5480", "5480-5500", "5500-5580", "5580-5600", "5600-5680", "5680-5700", "5700-5780", "5780-5800", "5800-5880", "5880-5900", "5900-5980", "5980-5a00", "5a00-5a80", "5a80-5b00", "5b00-5b80", "5b80-5c00", "5c00-5c80", "5c80-5d00", "5d00-5d80", "5d80-5e00", "5e00-5e80", "5e80-5f00", "5f00-5f80", "5f80-6000", "6000-6080", "6080-6100", "6100-6180", "6180-6200", "6200-6280", "6280-6300", "6300-6380", "6380-6400", "6400-6480", "6480-6500", "6500-6580", "6580-6600", "6600-6680", "6680-6700", "6700-6780", "6780-6800", "6800-6880", "6880-6900", "6900-6980", "6980-6a00", "6a00-6a80", "6a80-6b00", "6b00-6b80", "6b80-6c00", "6c00-6c80", "6c80-6d00", "6d00-6d80", "6d80-6e00", "6e00-6e80", "6e80-6f00", "6f00-6f80", "6f80-7000", "7000-7080", "7080-7100", "7100-7180", "7180-7200", "7200-7280", "7280-7300", "7300-7380", "7380-7400", "7400-7480", "7480-7500", "7500-7580", "7580-7600", "7600-7680", "7680-7700", "7700-7780", "7780-7800", "7800-7880", "7880-7900", "7900-7980", "7980-7a00", "7a00-7a80", "7a80-7b00", "7b00-7b80", "7b80-7c00", "7c00-7c80", "7c80-7d00", "7d00-7d80", "7d80-7e00", "7e00-7e80", "7e80-7f00", "7f00-7f80", "7f80-8000", "8000-8080", "8080-8100", "8100-8180", "8180-8200", "8200-8280", "8280-8300", "8300-8380", "8380-8400", "8400-8480", "8480-8500", "8500-8580", "8580-8600", "8600-8680", "8680-8700", "8700-8780", "8780-8800", "8800-8880", "8880-8900", "8900-8980", "8980-8a00", "8a00-8a80", "8a80-8b00", "8b00-8b80", "8b80-8c00", "8c00-8c80", "8c80-8d00", "8d00-8d80", "8d80-8e00", "8e00-8e80", "8e80-8f00", "8f00-8f80", "8f80-9000", "9000-9080", "9080-9100", "9100-9180", "9180-9200", "9200-9280", "9280-9300", "9300-9380", "9380-9400", "9400-9480", "9480-9500", "9500-9580", "9580-9600", "9600-9680", "9680-9700", "9700-9780", "9780-9800", "9800-9880", "9880-9900", "9900-9980", "9980-9a00", "9a00-9a80", "9a80-9b00", "9b00-9b80", "9b80-9c00", "9c00-9c80", "9c80-9d00", "9d00-9d80", "9d80-9e00", "9e00-9e80", "9e80-9f00", "9f00-9f80", "9f80-a000", "a000-a080", "a080-a100", "a100-a180", "a180-a200", "a200-a280", "a280-a300", "a300-a380", "a380-a400", "a400-a480", "a480-a500", "a500-a580", "a580-a600", "a600-a680", "a680-a700", "a700-a780", "a780-a800", "a800-a880", "a880-a900", "a900-a980", "a980-aa00", "aa00-aa80", "aa80-ab00", "ab00-ab80", "ab80-ac00", "ac00-ac80", "ac80-ad00", "ad00-ad80", "ad80-ae00", "ae00-ae80", "ae80-af00", "af00-af80", "af80-b000", "b000-b080", "b080-b100", "b100-b180", "b180-b200", "b200-b280", "b280-b300", "b300-b380", "b380-b400", "b400-b480", "b480-b500", "b500-b580", "b580-b600", "b600-b680", "b680-b700", "b700-b780", "b780-b800", "b800-b880", "b880-b900", "b900-b980", "b980-ba00", "ba00-ba80", "ba80-bb00", "bb00-bb80", "bb80-bc00", "bc00-bc80", "bc80-bd00", "bd00-bd80", "bd80-be00", "be00-be80", "be80-bf00", "bf00-bf80", "bf80-c000", "c000-c080", "c080-c100", "c100-c180", "c180-c200", "c200-c280", "c280-c300", "c300-c380", "c380-c400", "c400-c480", "c480-c500", "c500-c580", "c580-c600", "c600-c680", "c680-c700", "c700-c780", "c780-c800", "c800-c880", "c880-c900", "c900-c980", "c980-ca00", "ca00-ca80", "ca80-cb00", "cb00-cb80", "cb80-cc00", "cc00-cc80", "cc80-cd00", "cd00-cd80", "cd80-ce00", "ce00-ce80", "ce80-cf00", "cf00-cf80", "cf80-d000", "d000-d080", "d080-d100", "d100-d180", "d180-d200", "d200-d280", "d280-d300", "d300-d380", "d380-d400", "d400-d480", "d480-d500", "d500-d580", "d580-d600", "d600-d680", "d680-d700", "d700-d780", "d780-d800", "d800-d880", "d880-d900", "d900-d980", "d980-da00", "da00-da80", "da80-db00", "db00-db80", "db80-dc00", "dc00-dc80", "dc80-dd00", "dd00-dd80", "dd80-de00", "de00-de80", "de80-df00", "df00-df80", "df80-e000", "e000-e080", "e080-e100", "e100-e180", "e180-e200", "e200-e280", "e280-e300", "e300-e380", "e380-e400", "e400-e480", "e480-e500", "e500-e580", "e580-e600", "e600-e680", "e680-e700", "e700-e780", "e780-e800", "e800-e880", "e880-e900", "e900-e980", "e980-ea00", "ea00-ea80", "ea80-eb00", "eb00-eb80", "eb80-ec00", "ec00-ec80", "ec80-ed00", "ed00-ed80", "ed80-ee00", "ee00-ee80", "ee80-ef00", "ef00-ef80", "ef80-f000", "f000-f080", "f080-f100", "f100-f180", "f180-f200", "f200-f280", "f280-f300", "f300-f380", "f380-f400", "f400-f480", "f480-f500", "f500-f580", "f580-f600", "f600-f680", "f680-f700", "f700-f780", "f780-f800", "f800-f880", "f880-f900", "f900-f980", "f980-fa00", "fa00-fa80", "fa80-fb00", "fb00-fb80", "fb80-fc00", "fc00-fc80", "fc80-fd00", "fd00-fd80", "fd80-fe00", "fe00-fe80", "fe80-ff00", "ff00-ff80", "ff80-"},
false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := GenerateShardRanges(tt.args.shards)
got, err := GenerateShardRanges(tt.args.shards, 0)
if tt.wantErr {
assert.Error(t, err)
return
Expand All @@ -1567,7 +1573,7 @@ func TestGenerateShardRanges(t *testing.T) {
func TestGenerateShardRangesForManyShards(t *testing.T) {
for i := 1; i <= 1024; i++ {
t.Run(fmt.Sprintf("shards=%d", i), func(t *testing.T) {
ranges, err := GenerateShardRanges(i)
ranges, err := GenerateShardRanges(i, 0)

require.NoError(t, err)
require.Len(t, ranges, i)
Expand All @@ -1587,13 +1593,49 @@ func TestGenerateShardRangesForManyShards(t *testing.T) {
}
}

func TestShardCalculatorForShardsGreaterThan512(t *testing.T) {
got, err := GenerateShardRanges(512)
assert.NoError(t, err)
func TestGenerateShardRangesWithHexCharacterCount(t *testing.T) {
{
ranges, err := GenerateShardRanges(7, 1)

require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-2", "2-4", "4-6", "6-9", "9-b", "b-d", "d-"}, ranges)
}

{
ranges, err := GenerateShardRanges(7, 2)

require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"}, ranges)
}

want := "ff80-"
{
ranges, err := GenerateShardRanges(7, 3)

assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511])
require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-249", "249-492", "492-6db", "6db-924", "924-b6d", "b6d-db6", "db6-"}, ranges)
}

{
ranges, err := GenerateShardRanges(7, 4)

require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-2492", "2492-4924", "4924-6db6", "6db6-9249", "9249-b6db", "b6db-db6d", "db6d-"}, ranges)
}

{
_, err := GenerateShardRanges(32, 1)

require.Error(t, err)
require.ErrorContains(t, err, "the given number of shards (32) is too high for the given number of characters to use (1)")
}
}

func stringToKeyRange(spec string) *topodatapb.KeyRange {
Expand Down
4 changes: 2 additions & 2 deletions go/vt/topo/keyspace_external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func TestServerFindAllShardsInKeyspace(t *testing.T) {
// the keyspace to fetch later.
require.NoError(t, ts.CreateKeyspace(ctx, keyspace, &topodatapb.Keyspace{}))

shards, err := key.GenerateShardRanges(tt.shards)
shards, err := key.GenerateShardRanges(tt.shards, 0)
require.NoError(t, err)

for _, s := range shards {
Expand Down Expand Up @@ -167,7 +167,7 @@ func TestServerGetServingShards(t *testing.T) {
require.NoError(t, err)
var shardNames []string
if tt.shards > 0 {
shardNames, err = key.GenerateShardRanges(tt.shards)
shardNames, err = key.GenerateShardRanges(tt.shards, 0)
require.NoError(t, err)
require.Equal(t, tt.shards, len(shardNames))
for _, shardName := range shardNames {
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtctl/vtctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -3908,7 +3908,7 @@ func commandGenerateShardRanges(ctx context.Context, wr *wrangler.Wrangler, subF
return err
}

shardRanges, err := key.GenerateShardRanges(*numShards)
shardRanges, err := key.GenerateShardRanges(*numShards, 0)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion tools/map-shard-for-value/map-shard-for-value.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func main() {
if *shardsCSV != "" {
log.Fatalf("cannot specify both total_shards and shards")
}
shardArr, err := key.GenerateShardRanges(*totalShards)
shardArr, err := key.GenerateShardRanges(*totalShards, 0)
if err != nil {
log.Fatalf("failed to generate shard ranges: %v", err)
}
Expand Down
Loading