Go's slice working mechanism

created at 07-28-2021 views: 3

How does slice work? First, let's start with a demo:

package main

import (
    "fmt"
)

func main() {
    a := make([]int, 10)
    fmt.Printf("%p\n", &a[0])
    for i := 0; i < 100; i++ {
        a = append(a, 1)
        fmt.Printf("%p\n", &a[0])
    }
}

Use gdb to place a breakpoint on the line a = append(a, 1) and execute:

(gdb) list
1   package main
2   
3   import (
4       "fmt"
5   )
6   
7   func main() {
8       a := make([]int, 10)
9       fmt.Printf("%p\n", &a[0])
10      for i := 0; i < 100; i++ {
(gdb) 
11          a = append(a, 1)
12          fmt.Printf("%p\n", &a[0])
13      }
14  }
(gdb) b 11
Breakpoint 1 at 0x4af7d8: file /home/jiajun/Code/test/main.go, line 11.
(gdb) run
Starting program: /home/jiajun/Code/test/test 
[New LWP 12218]
[New LWP 12219]
[New LWP 12220]
[New LWP 12221]
0xc000130000

Thread 1 "test" hit Breakpoint 1, main.main () at /home/jiajun/Code/test/main.go:11
11          a = append(a, 1)
(gdb) s
runtime.growslice (et=0x4bd560, old=..., cap=11, ~r3=...) at /snap/go/5759/src/runtime/slice.go:76
76  func growslice(et *_type, old slice, cap int) slice {
(gdb) quit

You can see that the growslice function in slice.go is called:

func growslice(et *_type, old slice, cap int) slice {
    newcap := old.cap
    doublecap := newcap + newcap
    if cap > doublecap {
        newcap = cap
    } else {
        if old.len < 1024 {
            newcap = doublecap
        } else {
            for 0 < newcap && newcap < cap {
                newcap += newcap / 4
            }
            if newcap <= 0 {
                newcap = cap
            }
        }
    }

    var overflow bool
    var lenmem, newlenmem, capmem uintptr
    switch {
    case et.size == 1:
        lenmem = uintptr(old.len)
        newlenmem = uintptr(cap)
        capmem = roundupsize(uintptr(newcap))
        overflow = uintptr(newcap) > maxAlloc
        newcap = int(capmem)
    case et.size == sys.PtrSize:
        lenmem = uintptr(old.len) * sys.PtrSize
        newlenmem = uintptr(cap) * sys.PtrSize
        capmem = roundupsize(uintptr(newcap) * sys.PtrSize)
        overflow = uintptr(newcap) > maxAlloc/sys.PtrSize
        newcap = int(capmem / sys.PtrSize)
    case isPowerOfTwo(et.size):
        var shift uintptr
        if sys.PtrSize == 8 {
            // Mask shift for better code generation.
            shift = uintptr(sys.Ctz64(uint64(et.size))) & 63
        } else {
            shift = uintptr(sys.Ctz32(uint32(et.size))) & 31
        }
        lenmem = uintptr(old.len) << shift
        newlenmem = uintptr(cap) << shift
        capmem = roundupsize(uintptr(newcap) << shift)
        overflow = uintptr(newcap) > (maxAlloc >> shift)
        newcap = int(capmem >> shift)
    default:
        lenmem = uintptr(old.len) * et.size
        newlenmem = uintptr(cap) * et.size
        capmem, overflow = math.MulUintptr(et.size, uintptr(newcap))
        capmem = roundupsize(capmem)
        newcap = int(capmem / et.size)
    }

    if overflow || capmem > maxAlloc {
        panic(errorString("growslice: cap out of range"))
    }

    var p unsafe.Pointer
    if et.ptrdata == 0 {
        p = mallocgc(capmem, nil, false)
        memclrNoHeapPointers(add(p, newlenmem), capmem-newlenmem)
    } else {
        // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory.
        p = mallocgc(capmem, et, true)
        if lenmem > 0 && writeBarrier.enabled {
            bulkBarrierPreWriteSrcOnly(uintptr(p), uintptr(old.array), lenmem)
        }
    }
    memmove(p, old.array, lenmem)

    return slice{p, old.len, newcap}
}

The above code is the code when append is executed, but judging from the last few lines, doesn't it mean that a new piece of memory is requested every time? Let's run the first demo to see:

$ go run main.go
0xc000130000
0xc000134000
0xc000134000
0xc000134000
0xc000134000
0xc000134000
0xc000134000
0xc000134000
0xc000134000
0xc000134000
0xc000134000
...

It can be seen that the memory address typed out here is not different every time, and if you do this, the performance of append will be very low. Therefore, the growslice function will only be called when the capacity is insufficient. Usually additional values may be done directly in the assembly, let's take a look at the assembly code:

$ go tool compile -N -S main.go | grep main.go:11
    0x025e 00606 (main.go:11)   MOVQ    "".a+376(SP), DX
    0x0266 00614 (main.go:11)   LEAQ    1(DX), BX
    0x026a 00618 (main.go:11)   PCDATA  $0, $5
    0x026a 00618 (main.go:11)   MOVQ    "".a+368(SP), SI
    0x0272 00626 (main.go:11)   PCDATA  $1, $0
    0x0272 00626 (main.go:11)   MOVQ    "".a+384(SP), DI
    0x027a 00634 (main.go:11)   CMPQ    BX, DI
    0x027d 00637 (main.go:11)   JLS 644
    0x027f 00639 (main.go:11)   JMP 1167
    0x0284 00644 (main.go:11)   PCDATA  $0, $-1
    0x0284 00644 (main.go:11)   PCDATA  $1, $-1
    0x0284 00644 (main.go:11)   JMP 646
    0x0286 00646 (main.go:11)   PCDATA  $0, $5
    0x0286 00646 (main.go:11)   PCDATA  $1, $0
    0x0286 00646 (main.go:11)   MOVQ    $1, (SI)(DX*8)
    0x028e 00654 (main.go:11)   PCDATA  $1, $1
    0x028e 00654 (main.go:11)   MOVQ    SI, "".a+368(SP)
    0x0296 00662 (main.go:11)   MOVQ    BX, "".a+376(SP)
    0x029e 00670 (main.go:11)   MOVQ    DI, "".a+384(SP)
    0x048f 01167 (main.go:11)   PCDATA  $0, $5
    0x048f 01167 (main.go:11)   PCDATA  $1, $0
    0x048f 01167 (main.go:11)   MOVQ    DX, ""..autotmp_27+120(SP)
    0x0494 01172 (main.go:11)   PCDATA  $0, $6
    0x0494 01172 (main.go:11)   LEAQ    type.int(SB), AX
    0x049b 01179 (main.go:11)   PCDATA  $0, $5
    0x049b 01179 (main.go:11)   MOVQ    AX, (SP)
    0x049f 01183 (main.go:11)   PCDATA  $0, $0
    0x049f 01183 (main.go:11)   MOVQ    SI, 8(SP)
    0x04a4 01188 (main.go:11)   MOVQ    DX, 16(SP)
    0x04a9 01193 (main.go:11)   MOVQ    DI, 24(SP)
    0x04ae 01198 (main.go:11)   MOVQ    BX, 32(SP)
    0x04b3 01203 (main.go:11)   CALL    runtime.growslice(SB)
    0x04b8 01208 (main.go:11)   PCDATA  $0, $5
    0x04b8 01208 (main.go:11)   MOVQ    40(SP), SI
    0x04bd 01213 (main.go:11)   MOVQ    48(SP), AX
    0x04c2 01218 (main.go:11)   MOVQ    56(SP), DI
    0x04c7 01223 (main.go:11)   LEAQ    1(AX), BX
    0x04cb 01227 (main.go:11)   MOVQ    ""..autotmp_27+120(SP), DX
    0x04d0 01232 (main.go:11)   JMP 646

emmm, you can carefully taste this assembly code, you can see several JMP instructions, which are jump instructions. There is also the CMPQ instruction, which is a judgment instruction. Although the generated assembly code cannot be fully understood, combined with the above test results, it basically confirms our guess that the append operation is completed by assembly. Only when the capacity is insufficient, Will call the growslice function.

created at:07-28-2021
edited at: 07-28-2021: