基于ebpf实现的gls

虽然golang并不推荐使用goid来构建gls(goroutine local storage),仍然有着很多的实现gls并使用的尝试。github-gls这里是一个常见的实现,基本表述了golanggls的实现思路:获取goid,基于goid构建一个存储。本文中笔者尝试基于ebpf来构建一个golanggls

# 基本功能

本文中基于ebpf实现的gls具有如下功能:

  • 基于goid的存储。即map[goid]=value
  • 基于goroutine派生关系设置的value缺省值。即map[goid=1]=121,且goid=1派生goid=2,则map[goid=2]=map[goid=1]=121
    本文建议参照黑魔法-ebpf-对用户空间数据的写入进行理解。

# 用户态代码及效果

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package main

import (
	"context"
	"fmt"
	"strconv"
	"sync"
)

var Len = 5

//  info 为空时,使用父 goid 设置的值,否则存入 info
func Go1(ctx context.Context, info string, wg *sync.WaitGroup) {
	defer wg.Done()
	third := Third{}
	if info != "" {
		third.Store(info)
	}

	/* 诸多其他的逻辑 */
    
	info1 := third.Get()
	fmt.Printf("raw info: [%s], info get: [%s]
", info, info1)
}

//go:noinline
func Set(info []byte) {
	if len(info) > Len {
		info = info[:Len]
	}
	if len(info) < Len {
		tmp := make([]byte, Len-len(info))
		info = append(tmp, info...)
	}
	fmt.Println("info: ", string(info))

	return
}

//go:noinline
func Get(info []byte) []byte {
	// alalalala, magic come
	return info
}

type Third struct {
	Info string
}

func (t *Third) Store(info string) {
	infoByt := []byte(info)
	// 这里假设是个约束
	infoByt = infoByt[:Len]
	Set(infoByt)
}

func (t *Third) Get() string {
	infoByt := make([]byte, Len, Len)
	infoByt = Get(infoByt)
	return string(infoByt)
}

func main() {
	third1 := Third{}
	info := "12345"
	third1.Store(info)
	wg := &sync.WaitGroup{}
	ctx := context.Background()
	wg.Add(1)
	go Go1(ctx, "", wg) // 写入空数据,预期使用父 goid 数据,即 12345
	for i := 1125; i < 1130; i++ {
		wg.Add(1)
		v := strconv.Itoa(i)
		if i%10 == 0 {
			v = ""
		}
		go Go1(ctx, v, wg)
	}
	wg.Wait()

	/* very long handle logic*/

	third2 := Third{}
	infoGet := third2.Get()
	fmt.Printf("in main, getInfo, [%s]
", infoGet)
}

执行结果为:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
// 未开启 bpf 
info:  12345
info:  1129
raw info: [1129], info get: []
info:  1126
raw info: [1126], info get: []
info:  1128
raw info: [1128], info get: []
raw info: [], info get: []
info:  1125
info:  1127
raw info: [1125], info get: []
raw info: [1127], info get: []
in main, getInfo, []

// 开启 bpf
info:  12345
info:  1129
raw info: [], info get: [12345]  // 传入空值,使用父 goid 存入数据
raw info: [1129], info get: [1129]
info:  1126
raw info: [1126], info get: [1126]
info:  1127
raw info: [1127], info get: [1127]
info:  1125
raw info: [1125], info get: [1125]
info:  1128
raw info: [1128], info get: [1128]
in main, getInfo, [12345]

上述示例对比了开启bpf前后的用户态代码输出。可以看到,当子goroutine缺少某个信息时,可以获取父goroutine的数据作为缺省值。

# 应用

意味着我们可以在父goroutine中存入我们需要的数据,而后无论是否创建新的goroutine,均能获取该信息。维护了goroutine session的数据。

# ebpf 逻辑

这里仍然附上了ebpf的主要逻辑以便说明实现过程。除了之前文章中涉及的ebpf向用户态写入数据,本文使用了golang创建goroutine相关的uprobe来维护goroutine session状态。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
struct {
  __uint(type, BPF_MAP_TYPE_LRU_HASH);
  __uint(key_size, sizeof(u64));   // pid_tgid
  __uint(value_size, sizeof(u64)); // parent goid
  __uint(max_entries, MAX_ENTRIES);
} go_goid_map SEC(".maps");  // 用来获取 goid 状态

struct {
  __uint(type, BPF_MAP_TYPE_HASH);
  __uint(key_size, sizeof(u64));
  __uint(value_size, sizeof(u8) * 5);
  __uint(max_entries, 100);
} info_map SEC(".maps");  // 用来存储 goid->info

struct {
  __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
} events SEC(".maps");

static __always_inline u64 get_goid(u32 tgid, u32 pid) {
  unsigned long task_addr = (unsigned long)bpf_get_current_task();
  unsigned long fsbase    = 0;
  unsigned long g         = 0;
  u64           goid      = 0;
  // 直接基于 偏移量进行处理了
  // offset(task_struct, thread) = 4992
  // offset(thread_struct, fsbase) = 40
  bpf_probe_read(&fsbase, sizeof(fsbase),
                 (void *)(task_addr + OFF_TASK_THRD + OFF_THRD_FSBASE));
  bpf_probe_read(&g, sizeof(g), (void *)(fsbase - 8));
  bpf_probe_read(&goid, sizeof(goid), (void *)(g + GOID_OFFSET));
  return goid;
}

SEC("uprobe/main_set")
int uprobe__main_set(struct pt_regs *ctx) {
  uintptr_t info_p = 0;
  u8        info[5];
  u64       pid_tgid = bpf_get_current_pid_tgid();
  u32       pid      = (u32)(pid_tgid & 0x00FF);
  u32       tgid     = (u32)(pid_tgid >> 32);
  u64       goid     = 0;

  goid = get_goid(tgid, pid);
  SARG(ctx, 0, info_p);

  bpf_probe_read(&info, sizeof(info), (const void *)info_p);
  bpf_map_update_elem(&info_map, &goid, &info, BPF_ANY);

  event_t event  = {};
  event.pid_tgid = pid_tgid;
  memcpy(event.info, info, sizeof(info));
  bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));

  return 0;
}

SEC("uprobe/main_get")
int uprobe__main_get(struct pt_regs *ctx) {
  uintptr_t info_p   = 0;
  u64       pid_tgid = bpf_get_current_pid_tgid();
  u32       pid      = (u32)(pid_tgid & 0x00FF);
  u32       tgid     = (u32)(pid_tgid >> 32);
  u64       goid     = 0;

  goid = get_goid(tgid, pid);

  void *r_info_p = NULL;
  r_info_p       = bpf_map_lookup_elem(&info_map, &goid);
  if (r_info_p == NULL) {
    return 0;
  }
  event_t event  = {};
  event.pid_tgid = pid_tgid;

  SARG(ctx, 0, info_p);

  u8 info[5];
  memcpy(info, r_info_p, sizeof(info));

  memcpy(event.info, info, sizeof(event.info));

  event.res  = bpf_probe_write_user((u8 *)info_p, info, sizeof(info));
  event.addr = info_p;

  bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
  return 0;
}

/* golang_runtime_newproc1
   func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g,
 callerpc uintptr) *g {
*/
SEC("uprobe/golang_runtime_newproc1")
int uprobe__golang_runtime_newproc1(struct pt_regs *ctx) {
  u64       pid_tgid = bpf_get_current_pid_tgid();
  uintptr_t g_addr   = 0;
  u64       cur_goid = 0;

  SARG(ctx, 3, g_addr);
  bpf_probe_read(&cur_goid, sizeof(cur_goid), (void *)(g_addr + GOID_OFFSET));
  bpf_map_update_elem(&go_goid_map, &pid_tgid, &cur_goid, BPF_ANY);
  return 0;
}

SEC("uprobe/golang_runtime_runqput")
int uprobe__golang_runtime_runqput(struct pt_regs *ctx) {
  u64       pid_tgid    = bpf_get_current_pid_tgid();
  uintptr_t g_addr      = 0;
  u64      *parent_goid = NULL;
  u64       child_goid  = 0;
  void     *v_p         = NULL;

  parent_goid = bpf_map_lookup_elem(&go_goid_map, &pid_tgid);
  if (parent_goid == NULL) {
    return 0;
  }

  // 1. 获取新 goroutine  goid
  SARG(ctx, 1, g_addr);
  bpf_probe_read(&child_goid, sizeof(child_goid),
                 (void *)(g_addr + GOID_OFFSET));

  // 2. 设置新 goid 绑定的 caller 信息
  v_p = bpf_map_lookup_elem(&info_map, parent_goid);
  if (v_p == NULL) {
    return 0;
  }
  // 设置子 goid 绑定 caller   goid 信息
  bpf_map_update_elem(&info_map, &child_goid, v_p, BPF_ANY);

  bpf_map_delete_elem(&go_goid_map, &pid_tgid);
  return 0;
}

以上。

Hello, World!
使用 Hugo 构建
主题 StackJimmy 设计