2.1 字符串高效拼接

Kesa...大约 4 分钟golang

1. String

Goalang 中的 string 是只读类型,字符串的拼接实际上会创建新字符串再将内容进行拷贝,若存在大量的拼接操作,会对性能产生严重影响。

1.1 拼接方式

首先定义一个随机生成字符串的函数:

const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

func randomString(n int) string {
	b := make([]byte, n)
	for i := range b {
		b[i] = letterBytes[rand.Intn(len(letterBytes))]
	}
	return string(b)
}

// 备注:原书使用固定的 byte 数组
// 我偏向于使用动态计算

func getLetter(seed int64) byte {
	r := rand.New(rand.NewSource(seed))
	return byte('a' + r.Intn(26))
}

func genStr(n int, seed int64) string {
	buf := make([]byte, 0, n)
	for i := 0; i < n; i++ {
		buf = append(buf, getLetter(seed))
	}

	return *(*string)(unsafe.Pointer(&buf))
}

使用 +

将传入的字符串拼接 n

func concatStrUsingAdd(n int, str string) string {
	for i := 0; i < n; i++ {
		str += str
	}
	return str
}

使用 fmt.Sprintf

func concatStrUsingSprintf(n int, str string) string {
	var res string
	for i := 0; i < n; i++ {
		res = fmt.Sprintf("%s%s", res, str)
	}
	return res
}

使用 strings.Builder

func concatStrUsingStringsBuilder(n int, str string) string {
	var sb strings.Builder
	sb.Grow(n * len(str)) // 预设置 cap,减少内存分配次数

	for i := 0; i < n; i++ {
		sb.WriteString(str)
	}
	return sb.String()
}

使用 bytes.Buffer

func concatStrUsingBuffer(n int, str string) string {
	var buf bytes.Buffer
	buf.Grow(n * len(str)) // 预设 cap,减少内存分配次数

	for i := 0; i < n; i++ {
		buf.WriteString(str)
	}

	return buf.String()
}

使用 []byte

func concatStrUsingByteSlice(n int, str string) string {
	buf := make([]byte, 0, n*len(str)) // 预设 cap,减少内存分配次数
	for i := 0; i < n; i++ {
		buf = append(buf, str...)
	}

	return *(*string)(unsafe.Pointer(&buf))
}

1.2. Benchmark

func benchmark(b *testing.B, f func(int, string) string) {
	var str = randomString(10)
	for i := 0; i < b.N; i++ {
		f(10000, str)
	}
}

func BenchmarkPlusConcat(b *testing.B)    { benchmark(b, plusConcat) }
func BenchmarkSprintfConcat(b *testing.B) { benchmark(b, sprintfConcat) }
func BenchmarkBuilderConcat(b *testing.B) { benchmark(b, builderConcat) }
func BenchmarkBufferConcat(b *testing.B)  { benchmark(b, bufferConcat) }
func BenchmarkByteConcat(b *testing.B)    { benchmark(b, byteConcat) }
func BenchmarkPreByteConcat(b *testing.B) { benchmark(b, preByteConcat) }

// 备注:原书使用多个函数单独测试
// 我偏向于使用单个函数测试不同形式

func BenchmarkStrConcat(b *testing.B) {
	tests := []struct {
		name string
		f    func(int, string) string
	}{
		{name: "UsingAddOperator", f: concatStrUsingAdd},
		{name: "UsingSprintf", f: concatStrUsingSprintf},
		{name: "UsingStringsBuilder", f: concatStrUsingStringsBuilder},
		{name: "UsingBuffer", f: concatStrUsingBuffer},
		{name: "UsingByteSlice", f: concatStrUsingByteSlice},
	}

	const (
		StrLen int   = 10
		Seed   int64 = 47
	)

	for k := 0; k <= 100000; k *= 10 {
		for _, tt := range tests {
			b.Run(fmt.Sprintf("%-20s_%.0e", tt.name, float64(k)), func(b *testing.B) {
				str := genStr(StrLen, Seed)
				b.ResetTimer()
				for i := 0; i < b.N; i++ {
					tt.f(k, str)
				}
			})
		}

		if k == 0 {
			k = 1
		}
	}
}
BenchmarkStrConcat/UsingAddOperator_____0e+00-12                494569014                2.250 ns/op           0 B/op          0 allocs/op
BenchmarkStrConcat/UsingSprintf_________0e+00-12                486077326                2.260 ns/op           0 B/op          0 allocs/op
BenchmarkStrConcat/UsingStringsBuilder__0e+00-12                326929956                3.748 ns/op           0 B/op          0 allocs/op
BenchmarkStrConcat/UsingBuffer__________0e+00-12                185873576                6.491 ns/op           0 B/op          0 allocs/op
BenchmarkStrConcat/UsingByteSlice_______0e+00-12                195677515                6.060 ns/op           0 B/op          0 allocs/op
BenchmarkStrConcat/UsingAddOperator_____1e+01-12                 2450439               519.6 ns/op           600 B/op          9 allocs/op
BenchmarkStrConcat/UsingSprintf_________1e+01-12                  686652              1883 ns/op             920 B/op         29 allocs/op
BenchmarkStrConcat/UsingStringsBuilder__1e+01-12                11477025                90.15 ns/op          112 B/op          1 allocs/op
BenchmarkStrConcat/UsingBuffer__________1e+01-12                 7096644               168.8 ns/op           224 B/op          2 allocs/op
BenchmarkStrConcat/UsingByteSlice_______1e+01-12                14244270                84.42 ns/op          112 B/op          1 allocs/op
BenchmarkStrConcat/UsingAddOperator_____1e+02-12                   51835             20830 ns/op           53480 B/op         99 allocs/op
BenchmarkStrConcat/UsingSprintf_________1e+02-12                   32902             33453 ns/op           56723 B/op        299 allocs/op
BenchmarkStrConcat/UsingStringsBuilder__1e+02-12                 1580265               686.8 ns/op          1024 B/op          1 allocs/op
BenchmarkStrConcat/UsingBuffer__________1e+02-12                  959354              1191 ns/op            2048 B/op          2 allocs/op
BenchmarkStrConcat/UsingByteSlice_______1e+02-12                 1882011               648.2 ns/op          1024 B/op          1 allocs/op
BenchmarkStrConcat/UsingAddOperator_____1e+03-12                     864           1603599 ns/op         5320833 B/op        999 allocs/op
BenchmarkStrConcat/UsingSprintf_________1e+03-12                     784           1734681 ns/op         5366883 B/op       3004 allocs/op
BenchmarkStrConcat/UsingStringsBuilder__1e+03-12                  201326              6323 ns/op           10240 B/op          1 allocs/op
BenchmarkStrConcat/UsingBuffer__________1e+03-12                  122440              9516 ns/op           20480 B/op          2 allocs/op
BenchmarkStrConcat/UsingByteSlice_______1e+03-12                  199574              6268 ns/op           10240 B/op          1 allocs/op
BenchmarkStrConcat/UsingAddOperator_____1e+04-12                       8         152216050 ns/op        530997617 B/op     10021 allocs/op
BenchmarkStrConcat/UsingSprintf_________1e+04-12                       6         216692783 ns/op        832781652 B/op     33934 allocs/op
BenchmarkStrConcat/UsingStringsBuilder__1e+04-12                   21248             69425 ns/op          106496 B/op          1 allocs/op
BenchmarkStrConcat/UsingBuffer__________1e+04-12                   10000            114727 ns/op          212993 B/op          2 allocs/op
BenchmarkStrConcat/UsingByteSlice_______1e+04-12                   20474             62842 ns/op          106496 B/op          1 allocs/op
BenchmarkStrConcat/UsingAddOperator_____1e+05-12                       1        13100535200 ns/op       50399804616 B/op          101460 allocs/op
BenchmarkStrConcat/UsingSprintf_________1e+05-12                       1        28859776900 ns/op       100690781320 B/op         452724 allocs/op
BenchmarkStrConcat/UsingStringsBuilder__1e+05-12                    1777            639024 ns/op         1007621 B/op          1 allocs/op
BenchmarkStrConcat/UsingBuffer__________1e+05-12                    1435            862125 ns/op         2015235 B/op          2 allocs/op
BenchmarkStrConcat/UsingByteSlice_______1e+05-12                    2058            585812 ns/op         1007622 B/op          1 allocs/op

可以看出性能最好的是使用 []bytestrings.Builder

1.3 推荐使用 strings.Builder

综合易用性性能,使用strings.Builder拼接字符串是最好的选择。

A Builder is used to efficiently build a string using Write methods. It minimizes memory copying.

使用时若预先知道结果的长度,使用 Grow 预先分配内存,可以进一步提高性能,减少内存分配次数。

2. 原理

2.1 +

因 Golang 中的 string 是不变的,使用+拼接时会重新申请新的内存空间,然后将拼接后的内容拷贝过去。

拼接的过程中会所需的内存空间会不断增加和累积,大量的拼接操作将导致性能急剧下降。

2.2 strings.Builderbytes.Buffer

两者的底层都使用 []byte 存储,但是最后转换成 string 时方式不同:

// strings.Builder
func (b *Builder) String() string {
	return unsafe.String(unsafe.SliceData(b.buf), len(b.buf))
}

// bytes.Buffer
func concatStrUsingBuffer(n int, str string) string {
	var buf bytes.Buffer
	buf.Grow(n * len(str)) // 预设 cap,减少内存分配次数

	for i := 0; i < n; i++ {
		buf.WriteString(str)
	}

	return buf.String()
}
  • strings.Builder:将字节数组指针直接转换成字符串指针
  • bytes.Buffer:则是使用显式的类型转换,会重新申请内存将字节数组内容拷贝过去

Reference

  1. https://geektutu.com/post/hpg-string-concat.htmlopen in new window
上次编辑于:
评论
  • 按正序
  • 按倒序
  • 按热度
Powered by Waline v2.15.2