bpf: sockmap sample program

This program binds a program to a cgroup and then matches hard
coded IP addresses and adds these to a sockmap.

This will receive messages from the backend and send them to
the client.

     client:X <---> frontend:10000 client:X <---> backend:10001

To keep things simple this is only designed for 1:1 connections
using hard coded values. A more complete example would allow many
backends and clients.

To run,

 # sockmap <cgroup2_dir>

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
John Fastabend 2017-08-15 22:33:32 -07:00 committed by David S. Miller
parent 8a31db5615
commit 69e8cc134b
8 changed files with 547 additions and 6 deletions

View file

@ -65,6 +65,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
bool is_sockops = strncmp(event, "sockops", 7) == 0;
bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
@ -92,6 +93,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
} else if (is_sockops) {
prog_type = BPF_PROG_TYPE_SOCK_OPS;
} else if (is_sk_skb) {
prog_type = BPF_PROG_TYPE_SK_SKB;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@ -109,7 +112,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
return 0;
if (is_socket || is_sockops) {
if (is_socket || is_sockops || is_sk_skb) {
if (is_socket)
event += 6;
else
@ -567,7 +570,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
memcmp(shname, "perf_event", 10) == 0 ||
memcmp(shname, "socket", 6) == 0 ||
memcmp(shname, "cgroup/", 7) == 0 ||
memcmp(shname, "sockops", 7) == 0) {
memcmp(shname, "sockops", 7) == 0 ||
memcmp(shname, "sk_skb", 6) == 0) {
ret = load_and_attach(shname, data->d_buf,
data->d_size);
if (ret != 0)

78
samples/sockmap/Makefile Normal file
View file

@ -0,0 +1,78 @@
# kbuild trick to avoid linker error. Can be omitted if a module is built.
obj- := dummy.o
# List of programs to build
hostprogs-y := sockmap
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
HOSTCFLAGS += -I$(srctree)/tools/perf
sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
always += sockmap_kern.o
HOSTLOADLIBES_sockmap += -lelf -lpthread
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
LLC ?= llc
CLANG ?= clang
# Trick to allow make to be run from this directory
all:
$(MAKE) -C ../../ $(CURDIR)/
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@rm -f *~
$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
$(call if_changed_dep,cc_s_c)
$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
$(call filechk,offsets,__SYSCALL_NRS_H__)
clean-files += syscall_nrs.h
FORCE:
# Verify LLVM compiler tools are available and bpf target is supported by llc
.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
verify_cmds: $(CLANG) $(LLC)
@for TOOL in $^ ; do \
if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
exit 1; \
else true; fi; \
done
verify_target_bpf: verify_cmds
@if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
echo " NOTICE: LLVM version >= 3.7.1 required" ;\
exit 2; \
else true; fi
$(src)/*.c: verify_target_bpf
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
$(obj)/%.o: $(src)/%.c
$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
-Wno-unknown-warning-option \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@

View file

@ -0,0 +1,110 @@
/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include "../../tools/testing/selftests/bpf/bpf_helpers.h"
#include "../../tools/testing/selftests/bpf/bpf_endian.h"
/* Sockmap sample program connects a client and a backend together
* using cgroups.
*
* client:X <---> frontend:80 client:X <---> backend:80
*
* For simplicity we hard code values here and bind 1:1. The hard
* coded values are part of the setup in sockmap.sh script that
* is associated with this BPF program.
*
* The bpf_printk is verbose and prints information as connections
* are established and verdicts are decided.
*/
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
struct bpf_map_def SEC("maps") sock_map = {
.type = BPF_MAP_TYPE_SOCKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 20,
};
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
return skb->len;
}
SEC("sk_skb2")
int bpf_prog2(struct __sk_buff *skb)
{
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
int ret = 0;
if (lport == 10000)
ret = 10;
else
ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
return bpf_sk_redirect_map(&sock_map, ret, 0);
}
SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
int op, err = 0, index, key, ret;
op = (int) skops->op;
switch (op) {
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
lport = skops->local_port;
rport = skops->remote_port;
if (lport == 10000) {
ret = 1;
err = bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST,
BPF_SOCKMAP_STRPARSER);
bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
lport, bpf_ntohl(rport), err);
}
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
lport = skops->local_port;
rport = skops->remote_port;
if (bpf_ntohl(rport) == 10001) {
ret = 10;
err = bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST,
BPF_SOCKMAP_STRPARSER);
bpf_printk("active(%i -> %i) map ctx update err: %d\n",
lport, bpf_ntohl(rport), err);
}
break;
default:
break;
}
return 0;
}
char _license[] SEC("license") = "GPL";

View file

@ -0,0 +1,286 @@
/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/select.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <stdbool.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <linux/netlink.h>
#include <linux/socket.h>
#include <linux/sock_diag.h>
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
#include <libgen.h>
#include "../bpf/bpf_load.h"
#include "../bpf/bpf_util.h"
#include "../bpf/libbpf.h"
int running;
void running_handler(int a);
/* randomly selected ports for testing on lo */
#define S1_PORT 10000
#define S2_PORT 10001
static int sockmap_test_sockets(int rate, int dot)
{
int i, sc, err, max_fd, one = 1;
int s1, s2, c1, c2, p1, p2;
struct sockaddr_in addr;
struct timeval timeout;
char buf[1024] = {0};
int *fds[4] = {&s1, &s2, &c1, &c2};
fd_set w;
s1 = s2 = p1 = p2 = c1 = c2 = 0;
/* Init sockets */
for (i = 0; i < 4; i++) {
*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
if (*fds[i] < 0) {
perror("socket s1 failed()");
err = *fds[i];
goto out;
}
}
/* Allow reuse */
for (i = 0; i < 2; i++) {
err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
(char *)&one, sizeof(one));
if (err) {
perror("setsockopt failed()");
goto out;
}
}
/* Non-blocking sockets */
for (i = 0; i < 4; i++) {
err = ioctl(*fds[i], FIONBIO, (char *)&one);
if (err < 0) {
perror("ioctl s1 failed()");
goto out;
}
}
/* Bind server sockets */
memset(&addr, 0, sizeof(struct sockaddr_in));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
addr.sin_port = htons(S1_PORT);
err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s1 failed()\n");
goto out;
}
addr.sin_port = htons(S2_PORT);
err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s2 failed()\n");
goto out;
}
/* Listen server sockets */
addr.sin_port = htons(S1_PORT);
err = listen(s1, 32);
if (err < 0) {
perror("listen s1 failed()\n");
goto out;
}
addr.sin_port = htons(S2_PORT);
err = listen(s2, 32);
if (err < 0) {
perror("listen s1 failed()\n");
goto out;
}
/* Initiate Connect */
addr.sin_port = htons(S1_PORT);
err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c1 failed()\n");
goto out;
}
addr.sin_port = htons(S2_PORT);
err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c2 failed()\n");
goto out;
}
/* Accept Connecrtions */
p1 = accept(s1, NULL, NULL);
if (p1 < 0) {
perror("accept s1 failed()\n");
goto out;
}
p2 = accept(s2, NULL, NULL);
if (p2 < 0) {
perror("accept s1 failed()\n");
goto out;
}
max_fd = p2;
timeout.tv_sec = 10;
timeout.tv_usec = 0;
printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
c1, s1, c2, s2);
/* Ping/Pong data from client to server */
sc = send(c1, buf, sizeof(buf), 0);
if (sc < 0) {
perror("send failed()\n");
goto out;
}
do {
int s, rc, i;
/* FD sets */
FD_ZERO(&w);
FD_SET(c1, &w);
FD_SET(c2, &w);
FD_SET(p1, &w);
FD_SET(p2, &w);
s = select(max_fd + 1, &w, NULL, NULL, &timeout);
if (s == -1) {
perror("select()");
break;
} else if (!s) {
fprintf(stderr, "unexpected timeout\n");
break;
}
for (i = 0; i <= max_fd && s > 0; ++i) {
if (!FD_ISSET(i, &w))
continue;
s--;
rc = recv(i, buf, sizeof(buf), 0);
if (rc < 0) {
if (errno != EWOULDBLOCK) {
perror("recv failed()\n");
break;
}
}
if (rc == 0) {
close(i);
break;
}
sc = send(i, buf, rc, 0);
if (sc < 0) {
perror("send failed()\n");
break;
}
}
sleep(rate);
if (dot) {
printf(".");
fflush(stdout);
}
} while (running);
out:
close(s1);
close(s2);
close(p1);
close(p2);
close(c1);
close(c2);
return err;
}
int main(int argc, char **argv)
{
int rate = 1, dot = 1;
char filename[256];
int err, cg_fd;
char *cg_path;
cg_path = argv[argc - 1];
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
running = 1;
/* catch SIGINT */
signal(SIGINT, running_handler);
if (load_bpf_file(filename)) {
fprintf(stderr, "load_bpf_file: (%s) %s\n",
filename, strerror(errno));
return 1;
}
/* Cgroup configuration */
cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
if (cg_fd < 0) {
fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n",
cg_fd, cg_path);
return cg_fd;
}
/* Attach programs to sockmap */
err = __bpf_prog_attach(prog_fd[0], prog_fd[1], map_fd[0],
BPF_CGROUP_SMAP_INGRESS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
err, strerror(errno));
return err;
}
/* Attach to cgroups */
err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
err, strerror(errno));
return err;
}
err = sockmap_test_sockets(rate, dot);
if (err) {
fprintf(stderr, "ERROR: test socket failed: %d\n", err);
return err;
}
return 0;
}
void running_handler(int a)
{
running = 0;
}

View file

@ -110,6 +110,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
};
enum bpf_prog_type {
@ -127,6 +128,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_SKB,
};
enum bpf_attach_type {
@ -134,11 +136,18 @@ enum bpf_attach_type {
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
BPF_CGROUP_SOCK_OPS,
BPF_CGROUP_SMAP_INGRESS,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
enum bpf_sockmap_flags {
BPF_SOCKMAP_UNSPEC,
BPF_SOCKMAP_STRPARSER,
__MAX_BPF_SOCKMAP_FLAG
};
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
@ -210,6 +219,7 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
__u32 attach_bpf_fd2;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@ -545,6 +555,23 @@ union bpf_attr {
* @mode: operation mode (enum bpf_adj_room_mode)
* @flags: reserved for future use
* Return: 0 on success or negative error code
*
* int bpf_sk_redirect_map(map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the
* sock in map.
* @map: pointer to sockmap
* @key: key to lookup sock in map
* @flags: reserved for future use
* Return: SK_REDIRECT
*
* int bpf_sock_map_update(skops, map, key, flags, map_flags)
* @skops: pointer to bpf_sock_ops
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
* @map_flags: sock map specific flags
* bit 1: Enable strparser
* other bits: reserved
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -598,7 +625,9 @@ union bpf_attr {
FN(set_hash), \
FN(setsockopt), \
FN(skb_adjust_room), \
FN(redirect_map),
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -675,6 +704,15 @@ struct __sk_buff {
__u32 data;
__u32 data_end;
__u32 napi_id;
/* accessed by BPF_PROG_TYPE_sk_skb types */
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
__u32 remote_ip6[4]; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
};
struct bpf_tunnel_key {
@ -734,6 +772,12 @@ struct xdp_md {
__u32 data_end;
};
enum sk_action {
SK_ABORTED = 0,
SK_DROP,
SK_REDIRECT,
};
#define BPF_TAG_SIZE 8
struct bpf_prog_info {

View file

@ -211,20 +211,28 @@ int bpf_obj_get(const char *pathname)
return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
}
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
unsigned int flags)
int __bpf_prog_attach(int prog_fd1, int prog_fd2, int target_fd,
enum bpf_attach_type type,
unsigned int flags)
{
union bpf_attr attr;
bzero(&attr, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_bpf_fd = prog_fd1;
attr.attach_bpf_fd2 = prog_fd2;
attr.attach_type = type;
attr.attach_flags = flags;
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
}
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
unsigned int flags)
{
return __bpf_prog_attach(prog_fd, 0, target_fd, type, flags);
}
int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
{
union bpf_attr attr;

View file

@ -50,6 +50,10 @@ int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
unsigned int flags);
int __bpf_prog_attach(int prog1, int prog2,
int attachable_fd,
enum bpf_attach_type type,
unsigned int flags);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
void *data_out, __u32 *size_out, __u32 *retval,

View file

@ -65,6 +65,13 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags,
unsigned long long map_lags) =
(void *) BPF_FUNC_sock_map_update;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions