Make-initrd development discussion
 help / color / mirror / Atom feed
From: Alexey Gladkov <gladkov.alexey@gmail.com>
To: make-initrd@lists.altlinux.org
Subject: [make-initrd] [PATCH v1 07/11] feature/procacct: Add bpf helper
Date: Thu, 15 Jun 2023 19:59:16 +0200
Message-ID: <9df0a37215b485dd1f30ac624f512d55587f94f4.1686851829.git.gladkov.alexey@gmail.com> (raw)
In-Reply-To: <cover.1686851829.git.gladkov.alexey@gmail.com>

This bpf helper translates to userspace information about the arguments
to the execve system call. This is necessary because taskstats has no
other information about the process other than its comm. In the case of
shell scripts and some other cases, comm is not very informative.

Of course, in this approach, a race situation is possible between an
event from a bpf program and a netlink event. In this case, a memory
leak will occur because no one will remove the entry from the tree.

Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 features/debug-procacct/src/Makefile.mk    |  17 ++-
 features/debug-procacct/src/procacct-bpf.c |  57 ++++++++
 features/debug-procacct/src/procacct.c     | 153 +++++++++++++++++++--
 features/debug-procacct/src/procacct.h     |  13 ++
 4 files changed, 224 insertions(+), 16 deletions(-)
 create mode 100644 features/debug-procacct/src/procacct-bpf.c
 create mode 100644 features/debug-procacct/src/procacct.h

diff --git a/features/debug-procacct/src/Makefile.mk b/features/debug-procacct/src/Makefile.mk
index ea02bf91..505d8288 100644
--- a/features/debug-procacct/src/Makefile.mk
+++ b/features/debug-procacct/src/Makefile.mk
@@ -1,6 +1,19 @@
 procacct_DEST = $(dest_data_bindir)/procacct
 procacct_SRCS = $(FEATURESDIR)/debug-procacct/src/procacct.c
-procacct_CFLAGS = -D_GNU_SOURCE -Idatasrc/libinitramfs
-procacct_LIBS = -L$(dest_data_libdir) -linitramfs
+procacct_CFLAGS = -D_GNU_SOURCE -Idatasrc/libinitramfs -DPROCACCT_BPF_FILE=\"/bin/procacct-bpf.o\"
+procacct_LIBS = -L$(dest_data_libdir) -linitramfs -lbpf
 
 PROGS += procacct
+
+procacct_bpf_DEST = $(dest_data_bindir)/procacct-bpf.o
+procacct_bpf_SRCS = $(FEATURESDIR)/debug-procacct/src/procacct-bpf.c
+
+CLANG ?= clang-15
+LLVM_STRIP = llvm-strip
+
+$(dest_data_bindir)/procacct-bpf.o: $(procacct_bpf_SRCS)
+	$(Q)mkdir -p -- $(dir $@)
+	$(call quiet_cmd,BPF,$@,$(CLANG)) -target bpf -g -O2 -Wall -Wextra -o $@ -c $<
+	$(call quiet_cmd,STRIP,$@,$(LLVM_STRIP)) -g $@
+
+build-progs: $(dest_data_bindir)/procacct-bpf.o
diff --git a/features/debug-procacct/src/procacct-bpf.c b/features/debug-procacct/src/procacct-bpf.c
new file mode 100644
index 00000000..2dbd25d8
--- /dev/null
+++ b/features/debug-procacct/src/procacct-bpf.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+
+#include "procacct.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct mm_struct {
+	unsigned long arg_start, arg_end, env_start, env_end;
+} __attribute__((preserve_access_index));
+
+struct task_struct {
+	pid_t pid;
+	struct mm_struct *mm;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 256 * sizeof(struct task_cmdline));
+} ringbuf SEC(".maps");
+
+SEC("raw_tracepoint/sched_process_exec")
+int procacct_process_exec(struct bpf_raw_tracepoint_args *ctx __attribute__((unused)))
+{
+	struct task_cmdline *buf;
+
+	struct task_struct *task = (void *) bpf_get_current_task_btf();
+	char *arg_start = (char *) BPF_CORE_READ(task, mm, arg_start);
+	char *arg_end   = (char *) BPF_CORE_READ(task, mm, arg_end);
+	size_t len = arg_end - arg_start;
+
+	buf = bpf_ringbuf_reserve(&ringbuf, sizeof(*buf), 0);
+	if (!buf)
+		return 0;
+
+	if (len >= sizeof(buf->cmdline))
+		len = sizeof(buf->cmdline);
+
+	if (bpf_probe_read_user(buf->cmdline, len, arg_start)) {
+		bpf_ringbuf_discard(buf, 0);
+		return 0;
+	}
+
+	buf->pid = BPF_CORE_READ(task, pid);
+	buf->cmdline_len = len;
+
+	bpf_ringbuf_submit(buf, 0);
+	return 0;
+}
diff --git a/features/debug-procacct/src/procacct.c b/features/debug-procacct/src/procacct.c
index 151a6f2c..51c81734 100644
--- a/features/debug-procacct/src/procacct.c
+++ b/features/debug-procacct/src/procacct.c
@@ -27,6 +27,9 @@
 #include <linux/taskstats.h>
 #include <linux/kdev_t.h>
 
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
@@ -46,6 +49,8 @@
 #include "rd/logging.h"
 #include "rd/memory.h"
 
+#include "procacct.h"
+
 /* Maximum size of response requested or message sent */
 #define MAX_MSG_SIZE 2048
 
@@ -73,20 +78,27 @@ struct msgtemplate {
 	char buf[MAX_MSG_SIZE];
 };
 
-static void *proc_root = NULL;
-
-struct proc_cmdline {
+struct cmdline {
 	pid_t pid;
 	char *cmdline;
 };
 
+static void *tree_root = NULL;
+
 struct ctx_netlink {
 	uint16_t cpuid;
 	uint16_t cpumask_len;
 	char cpumask[100 + 6 * MAX_CPUS];
 };
 
+struct ctx_bpf {
+	struct ring_buffer *ringbuf;
+	struct bpf_object *file;
+	struct bpf_link *link;
+};
+
 enum {
+	FD_BPF,
 	FD_NETLINK,
 	FD_MAX,
 };
@@ -107,11 +119,15 @@ struct fd_handler {
 static struct fd_handler fd_handler_list[FD_MAX];
 
 static void usage(void)                                                       __attribute__((noreturn));
-static int proc_compare(const void *a, const void *b)                         __attribute__((nonnull(1, 2)));
+static int tree_compare(const void *a, const void *b)                         __attribute__((nonnull(1, 2)));
 static int process_netlink_events(struct fd_handler *el);
 static int prepare_netlink(struct fd_handler *el);
 static int finish_netlink(struct fd_handler *el);
 static void setup_netlink_fd(struct fd_handler *el);
+static int process_ringbuf_event(void *ctx, void *data, size_t data_sz);
+static int process_bpf_events(struct fd_handler *el);
+static int finish_bpf(struct fd_handler *el);
+static void setup_bpf_fd(struct fd_handler *el);
 static ssize_t send_cmd(int fd, uint16_t nlmsg_type, uint8_t genl_cmd,
                         uint16_t nla_type, void *nla_data, uint16_t nla_len);
 static uint16_t get_family_id(int fd);
@@ -119,10 +135,10 @@ static void print_procacct(int fd, struct taskstats *t)                       __
 static void handle_aggr(struct nlattr *na, int fd)                            __attribute__((nonnull(1)));
 static int setup_epoll_fd(struct fd_handler list[FD_MAX]);
 
-int proc_compare(const void *a, const void *b)
+int tree_compare(const void *a, const void *b)
 {
-	pid_t pid_a = ((struct proc_cmdline *)a)->pid;
-	pid_t pid_b = ((struct proc_cmdline *)b)->pid;
+	pid_t pid_a = ((struct cmdline *)a)->pid;
+	pid_t pid_b = ((struct cmdline *)b)->pid;
 
 	if (pid_a < pid_b)
 		return -1;
@@ -243,8 +259,10 @@ uint16_t get_family_id(int fd)
 
 void print_procacct(int fd, struct taskstats *t)
 {
-	struct proc_cmdline key = { .pid = (pid_t) t->ac_pid };
-	struct proc_cmdline *proc = tfind(&key, proc_root, proc_compare);
+	struct cmdline *proc, key = { .pid = (pid_t) t->ac_pid };
+	void **val = tfind(&key, &tree_root, tree_compare);
+
+	proc = (val ? *val : NULL);
 
 	dprintf(fd,
 	        "%c\t%u\t%u\t%u\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t%llu\t[%s]\t%s\n",
@@ -263,8 +281,14 @@ void print_procacct(int fd, struct taskstats *t)
 	        (t->read_bytes),                     // bytes of read I/O
 	        (t->write_bytes),                    // bytes of write I/O
 	        (t->ac_comm),                        // comm
-	        (proc ? proc->cmdline : "")          // cmdline
+	        (proc ? proc->cmdline : "-")         // cmdline
 	       );
+
+	if (proc) {
+		tdelete(proc, &tree_root, tree_compare);
+		free(proc->cmdline);
+		free(proc);
+	}
 }
 
 void handle_aggr(struct nlattr *na, int fd)
@@ -317,14 +341,14 @@ int prepare_netlink(struct fd_handler *el)
 
 	ctx->cpuid = get_family_id(el->fd);
 	if (!ctx->cpuid) {
-		rd_err("error getting family id, errno=%d", errno);
+		rd_err("error getting family id (errno=%d): %m", errno);
 		free(ctx);
 		return -1;
 	}
 
 	ret = send_cmd(el->fd, ctx->cpuid, TASKSTATS_CMD_GET,
 	               TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
-		       &ctx->cpumask, ctx->cpumask_len);
+	               &ctx->cpumask, ctx->cpumask_len);
 	if (ret < 0) {
 		rd_err("error sending register cpumask");
 		free(ctx);
@@ -343,7 +367,7 @@ int finish_netlink(struct fd_handler *el)
 
 	ret = send_cmd(el->fd, ctx->cpuid, TASKSTATS_CMD_GET,
 	               TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
-		       &ctx->cpumask, ctx->cpumask_len);
+	               &ctx->cpumask, ctx->cpumask_len);
 	if (ret < 0) {
 		rd_err("error sending deregister cpumask");
 		return -1;
@@ -367,7 +391,7 @@ int process_netlink_events(struct fd_handler *el)
 				return 0;
 			if (errno == EINTR)
 				continue;
-			rd_fatal("nonfatal reply error: %m");
+			rd_fatal("fatal reply error: %m");
 		}
 
 		if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), ret)) {
@@ -401,6 +425,105 @@ int process_netlink_events(struct fd_handler *el)
 	return 0;
 }
 
+int process_ringbuf_event(void *ctx __attribute__((unused)),
+                          void *data,
+                          size_t data_sz  __attribute__((unused)))
+{
+	struct cmdline *new, **val;
+	struct task_cmdline *task = data;
+
+	new = rd_malloc_or_die(sizeof(*new));
+
+	new->pid = task->pid;
+	new->cmdline = rd_calloc_or_die(1, task->cmdline_len * sizeof(char));
+
+	for (size_t i = 0; i < task->cmdline_len; i++) {
+		switch (task->cmdline[i]) {
+			case '\n':
+			case '\0':
+				new->cmdline[i] = ' ';
+				break;
+			default:
+				new->cmdline[i] = task->cmdline[i];
+				break;
+		}
+	}
+
+	val = tsearch(new, &tree_root, tree_compare);
+
+	if (!val) {
+		rd_err("unable to add info about pid=%u", task->pid);
+
+	} else if (*val != new) {
+		struct cmdline *cur = *val;
+
+		free(cur->cmdline);
+		cur->cmdline = new->cmdline;
+		free(new);
+	}
+
+	return 0;
+}
+
+int process_bpf_events(struct fd_handler *el)
+{
+	struct ctx_bpf *ctx = el->data;
+again:
+	errno = 0;
+	if (ring_buffer__poll(ctx->ringbuf, -1) < 0) {
+		if (errno == EINTR)
+			goto again;
+		rd_err("error polling perf buffer: %m");
+	}
+	return 0;
+}
+
+void setup_bpf_fd(struct fd_handler *el)
+{
+	struct ctx_bpf *ctx;
+	struct bpf_program *prog;
+
+	ctx = rd_calloc_or_die(1, sizeof(*ctx));
+
+	ctx->file = bpf_object__open(PROCACCT_BPF_FILE);
+	if (libbpf_get_error(ctx->file))
+		rd_fatal("opening BPF object file failed");
+
+	if (bpf_object__load(ctx->file))
+		rd_fatal("loading BPF object file failed");
+
+	prog = bpf_object__find_program_by_name(ctx->file, "procacct_process_exec");
+	if (!prog)
+		rd_fatal("finding a procacct_task_alloc in the file object failed");
+
+	int id = bpf_object__find_map_fd_by_name(ctx->file, "ringbuf");
+	if (id < 0)
+		rd_fatal("finding a ringbuf in the file object failed");
+
+	ctx->ringbuf = ring_buffer__new(id, process_ringbuf_event, NULL, NULL);
+
+	ctx->link = bpf_program__attach(prog);
+	if (libbpf_get_error(ctx->link))
+		rd_fatal("bpf attach failed");
+
+	el->fd = ring_buffer__epoll_fd(ctx->ringbuf);
+
+	el->data = ctx;
+	el->fd_handler = process_bpf_events;
+	el->fd_finish = finish_bpf;
+}
+
+int finish_bpf(struct fd_handler *el)
+{
+	struct ctx_bpf *ctx = el->data;
+
+	ring_buffer__free(ctx->ringbuf);
+	bpf_link__destroy(ctx->link);
+	bpf_object__close(ctx->file);
+
+	return 0;
+}
+
 int setup_epoll_fd(struct fd_handler list[FD_MAX])
 {
 	int epollfd;
@@ -454,6 +577,8 @@ int main(int argc, char *argv[])
 	current_pid = getpid();
 
 	setup_netlink_fd(&fd_handler_list[FD_NETLINK]);
+	setup_bpf_fd(&fd_handler_list[FD_BPF]);
+
 	fd_epoll = setup_epoll_fd(fd_handler_list);
 
 	for (int i = 0; i < FD_MAX; i++) {
diff --git a/features/debug-procacct/src/procacct.h b/features/debug-procacct/src/procacct.h
new file mode 100644
index 00000000..cd783bc2
--- /dev/null
+++ b/features/debug-procacct/src/procacct.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __PROCACCT_H__
+#define __PROCACCT_H__
+
+#define MAX_ARGS_LEN 4096
+
+struct task_cmdline {
+	pid_t pid;
+	size_t cmdline_len;
+	char cmdline[MAX_ARGS_LEN];
+};
+
+#endif /* __PROCACCT_H__ */
-- 
2.33.8



  parent reply	other threads:[~2023-06-15 17:59 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-15 17:59 [make-initrd] [PATCH v1 00/11] Add accounting feature Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 01/11] feature/procacct: New feature to debug initramfs Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 02/11] feature/procacct: Use epoll Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 03/11] feature/procacct: Use default rcvbufsz Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 04/11] feature/procacct: Track more values Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 05/11] feature/procacct: Use msgtemplate instead of custom struct Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 06/11] feature/procacct: Use nonblocking per-call Alexey Gladkov
2023-06-15 17:59 ` Alexey Gladkov [this message]
2023-06-15 17:59 ` [make-initrd] [PATCH v1 08/11] feature/procacct: Add accounting report Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 09/11] feature/procacct: Wait until procacct is initialized Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 10/11] feature/procacct: Make procacct optional Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 11/11] feature/procacct: Add to testing Alexey Gladkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9df0a37215b485dd1f30ac624f512d55587f94f4.1686851829.git.gladkov.alexey@gmail.com \
    --to=gladkov.alexey@gmail.com \
    --cc=make-initrd@lists.altlinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Make-initrd development discussion

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://lore.altlinux.org/make-initrd/0 make-initrd/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 make-initrd make-initrd/ http://lore.altlinux.org/make-initrd \
		make-initrd@lists.altlinux.org make-initrd@lists.altlinux.ru make-initrd@lists.altlinux.com
	public-inbox-index make-initrd

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://lore.altlinux.org/org.altlinux.lists.make-initrd


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git