1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
| #define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
#define INADDR_LOOPBACK_HOST INADDR_LOOPBACK
#define INADDR_LOOPBACK_NET 0x0100007f /* 127.0.0.1 */
#define ns2sec(ns) ((ns) / (1000 * 1000 * 1000))
#ifndef memcpy
#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
#endif
#define MERGE_SEC 10
typedef struct {
u8 opcode;
u8 opsize;
u16 port;
u32 ip;
} toa_data_t;
// 一般 toa 模块里只会填充一个 toa 数据
#define TCP_OPTION_LEN 1
struct tcp_event {
u32 raddr;
u32 laddr;
u16 rport;
u16 lport;
int err;
u64 toa_addr;
toa_data_t toa_data;
u64 sec;
u64 ns;
};
typedef struct tcp_event tcp_event_t;
const struct tcp_event* unused_0x01 __attribute__((unused));
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(key_size, sizeof(tcp_event_t));
__uint(value_size, sizeof(u64)); // timestamp
__uint(max_entries, 1024);
} tcp_event_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(max_entries, 1024);
} events SEC(".maps");
enum toa_type {
ipopt_toa = 254, // IP_v4 客户端 IP,目前仅考虑
};
#define _AF_INET 2 /* internetwork: UDP, TCP, etc. */
#define _IPPROTO_TCP 6
SEC("kretprobe/inet_csk_accept")
int kretprobe__inet_csk_accept(struct pt_regs* ctx) {
u64 start_ns = bpf_ktime_get_ns();
tcp_event_t event = {};
struct sock* sk = (struct sock*)PT_REGS_RC(ctx);
if (sk == NULL) {
return 0;
}
struct sock_common sk_common = {};
bpf_probe_read(&sk_common, sizeof(sk_common), (const void*)(sk));
if (sk_common.skc_family != _AF_INET) {
return 0;
}
// 不处理本地回环
if (sk_common.skc_rcv_saddr == INADDR_LOOPBACK_NET ||
sk_common.skc_daddr == INADDR_LOOPBACK_NET) {
return 0;
}
event.laddr = bpf_ntohl(sk_common.skc_rcv_saddr);
event.raddr = bpf_ntohl(sk_common.skc_daddr);
event.lport = sk_common.skc_num;
event.rport = bpf_ntohs(sk_common.skc_dport);
int err;
toa_data_t toa_data[TCP_OPTION_LEN] = {};
err = BPF_CORE_READ_INTO(&toa_data, sk, sk_user_data);
if (err) {
return 0;
}
u8 i = 0;
#pragma unroll
for (i = 0; i < TCP_OPTION_LEN; i++) {
if (toa_data[i].opcode != ipopt_toa) {
continue;
}
memcpy(&event.toa_data, &toa_data[i], sizeof(toa_data_t));
}
u32 raddr = event.raddr;
if (event.toa_data.ip != 0 && event.toa_data.port != 0) {
// 挂载在 lvs 时,DS 的 IP 会发生变更。这里也给聚合掉。
event.raddr = 0;
}
// remote port 都不要
event.toa_data.port = 0;
event.rport = 0;
u64 sec = 0;
u64 now_ns = bpf_ktime_get_ns();
u64* last_ns = (u64*)bpf_map_lookup_elem(&tcp_event_map, &event);
if (last_ns != NULL) {
sec = ns2sec((now_ns - *last_ns));
if (sec <= MERGE_SEC) {
return 0;
}
} else {
sec = 99;
}
bpf_map_update_elem(&tcp_event_map, &event, &now_ns, BPF_ANY);
event.sec = sec;
event.raddr = raddr;
u64 end_ns = bpf_ktime_get_ns();
event.ns = end_ns - start_ns;
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
return 0;
}
|