[make-initrd] [PATCH v1 01/11] feature/procacct: New feature to debug initramfs

Make-initrd development discussion
 help / color / mirror / Atom feed

From: Alexey Gladkov <gladkov.alexey@gmail.com>
To: make-initrd@lists.altlinux.org
Subject: [make-initrd] [PATCH v1 01/11] feature/procacct: New feature to debug initramfs
Date: Thu, 15 Jun 2023 19:59:10 +0200
Message-ID: <e283888669a329cd7a518bd02cd2cfffd6478989.1686851829.git.gladkov.alexey@gmail.com> (raw)
In-Reply-To: <cover.1686851829.git.gladkov.alexey@gmail.com>

Based on an example of accounting from the linux kernel sources
tools/accounting/procacct.c

Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com>
---
 features/debug-procacct/README.md       |   3 +
 features/debug-procacct/config.mk       |   1 +
 features/debug-procacct/rules.mk        |   1 +
 features/debug-procacct/src/Makefile.mk |   6 +
 features/debug-procacct/src/procacct.c  | 395 ++++++++++++++++++++++++
 5 files changed, 406 insertions(+)
 create mode 100644 features/debug-procacct/README.md
 create mode 100644 features/debug-procacct/config.mk
 create mode 100644 features/debug-procacct/rules.mk
 create mode 100644 features/debug-procacct/src/Makefile.mk
 create mode 100644 features/debug-procacct/src/procacct.c

diff --git a/features/debug-procacct/README.md b/features/debug-procacct/README.md
new file mode 100644
index 00000000..59e39a61
--- /dev/null
+++ b/features/debug-procacct/README.md
@@ -0,0 +1,3 @@
+# debug-procacct
+
+The feature is designed to debug the boot process inside the initramfs.
diff --git a/features/debug-procacct/config.mk b/features/debug-procacct/config.mk
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/features/debug-procacct/config.mk
@@ -0,0 +1 @@
+
diff --git a/features/debug-procacct/rules.mk b/features/debug-procacct/rules.mk
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/features/debug-procacct/rules.mk
@@ -0,0 +1 @@
+
diff --git a/features/debug-procacct/src/Makefile.mk b/features/debug-procacct/src/Makefile.mk
new file mode 100644
index 00000000..ea02bf91
--- /dev/null
+++ b/features/debug-procacct/src/Makefile.mk
@@ -0,0 +1,6 @@
+procacct_DEST = $(dest_data_bindir)/procacct
+procacct_SRCS = $(FEATURESDIR)/debug-procacct/src/procacct.c
+procacct_CFLAGS = -D_GNU_SOURCE -Idatasrc/libinitramfs
+procacct_LIBS = -L$(dest_data_libdir) -linitramfs
+
+PROGS += procacct
diff --git a/features/debug-procacct/src/procacct.c b/features/debug-procacct/src/procacct.c
new file mode 100644
index 00000000..9302edb0
--- /dev/null
+++ b/features/debug-procacct/src/procacct.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+/* procacct.c
+ *
+ * Demonstrator of fetching resource data on task exit, as a way
+ * to accumulate accurate program resource usage statistics, without
+ * prior identification of the programs. For that, the fields for
+ * device and inode of the program executable binary file are also
+ * extracted in addition to the command string.
+ *
+ * The TGID together with the PID and the AGROUP flag allow
+ * identification of threads in a process and single-threaded processes.
+ * The ac_tgetime field gives proper whole-process walltime.
+ *
+ * Written (changed) by Thomas Orgis, University of Hamburg in 2022
+ *
+ * This is a cheap derivation (inheriting the style) of getdelays.c:
+ *
+ * Utility to get per-pid and per-tgid delay accounting statistics
+ * Also illustrates usage of the taskstats interface
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2005
+ * Copyright (C) Balbir Singh, IBM Corp. 2006
+ * Copyright (c) Jay Lan, SGI. 2006
+ */
+#include <linux/genetlink.h>
+#include <linux/acct.h>
+#include <linux/taskstats.h>
+#include <linux/kdev_t.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <search.h>
+
+#include "rd/logging.h"
+
+/* Maximum size of response requested or message sent */
+#define MAX_MSG_SIZE 1024
+
+/* Maximum number of cpus expected to be specified in a cpumask */
+#define MAX_CPUS 64
+
+/*
+ * Generic macros for dealing with netlink sockets. Might be duplicated
+ * elsewhere. It is recommended that commercial grade applications use
+ * libnl or libnetlink and use the interfaces provided by the library
+ */
+#define GENLMSG_DATA(glh)	((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh)	(NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na)		((void *)((char *)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len)	(len - NLA_HDRLEN)
+
+static pid_t current_pid;
+static int rcvbufsz;
+static char name[100];
+
+struct msgtemplate {
+	struct nlmsghdr n;
+	struct genlmsghdr g;
+	char buf[MAX_MSG_SIZE];
+};
+
+static void *proc_root = NULL;
+
+struct proc_cmdline {
+	pid_t pid;
+	char *cmdline;
+};
+
+static void usage(void)                                                       __attribute__((noreturn));
+static int proc_compare(const void *a, const void *b)                         __attribute__((nonnull(1, 2)));
+static int create_nl_socket(int protocol);
+static ssize_t send_cmd(int fd, uint16_t nlmsg_type, uint8_t genl_cmd,
+                        uint16_t nla_type, void *nla_data, uint16_t nla_len);
+static uint16_t get_family_id(int fd);
+static void print_procacct(int fd, struct taskstats *t)                       __attribute__((nonnull(2)));
+static void handle_aggr(struct nlattr *na, int fd)                            __attribute__((nonnull(1)));
+
+int proc_compare(const void *a, const void *b)
+{
+	pid_t pid_a = ((struct proc_cmdline *)a)->pid;
+	pid_t pid_b = ((struct proc_cmdline *)b)->pid;
+
+	if (pid_a < pid_b)
+		return -1;
+	if (pid_a > pid_b)
+		return 1;
+	return 0;
+}
+
+void usage(void)
+{
+	fprintf(stderr, "procacct [-o logfile] [-r bufsize] [-m cpumask]\n");
+	exit(1);
+}
+
+/*
+ * Create a raw netlink socket and bind
+ */
+int create_nl_socket(int protocol)
+{
+	int fd;
+	struct sockaddr_nl local;
+
+	fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+	if (fd < 0)
+		return -1;
+
+	if (rcvbufsz) {
+		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbufsz, sizeof(rcvbufsz)) < 0) {
+			rd_err("unable to set socket rcv buf size to %d", rcvbufsz);
+			goto error;
+		}
+	}
+
+	memset(&local, 0, sizeof(local));
+	local.nl_family = AF_NETLINK;
+
+	if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) {
+		rd_err("unable bind to socket");
+		goto error;
+	}
+
+	return fd;
+error:
+	close(fd);
+	return -1;
+}
+
+
+ssize_t send_cmd(int fd, uint16_t nlmsg_type, uint8_t genl_cmd, uint16_t nla_type, void *nla_data, uint16_t nla_len)
+{
+	struct nlattr *na;
+	struct sockaddr_nl nladdr;
+
+	struct msgtemplate msg;
+
+	msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+	msg.n.nlmsg_type = nlmsg_type;
+	msg.n.nlmsg_flags = NLM_F_REQUEST;
+	msg.n.nlmsg_seq = 0;
+	msg.n.nlmsg_pid = (uint32_t) current_pid;
+
+	msg.g.cmd = genl_cmd;
+	msg.g.version = 0x1;
+
+	na = (struct nlattr *) GENLMSG_DATA(&msg);
+	na->nla_type = nla_type;
+	na->nla_len = nla_len + 1 + NLA_HDRLEN;
+
+	memcpy(NLA_DATA(na), nla_data, nla_len);
+
+	msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+	char *buf = (char *) &msg;
+	ssize_t buflen = msg.n.nlmsg_len;
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	ssize_t ret;
+
+	while ((ret = sendto(fd, buf, (size_t) buflen, 0, (struct sockaddr *) &nladdr, sizeof(nladdr))) < buflen) {
+		if (ret > 0) {
+			buf += ret;
+			buflen -= ret;
+		} else if (errno != EINTR && errno != EAGAIN) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+ * Probe the controller in genetlink to find the family id
+ * for the TASKSTATS family
+ */
+uint16_t get_family_id(int fd)
+{
+	struct {
+		struct nlmsghdr n;
+		struct genlmsghdr g;
+		char buf[256];
+	} ans;
+
+	strcpy(name, TASKSTATS_GENL_NAME);
+
+	ssize_t ret = send_cmd(fd, GENL_ID_CTRL, CTRL_CMD_GETFAMILY,
+	                       CTRL_ATTR_FAMILY_NAME, name, strlen(TASKSTATS_GENL_NAME) + 1);
+	if (ret < 0)
+		return 0;
+
+	ret = recv(fd, &ans, sizeof(ans), 0);
+
+	if (ans.n.nlmsg_type == NLMSG_ERROR || (ret < 0) || !NLMSG_OK((&ans.n), ret))
+		return 0;
+
+	struct nlattr *na;
+
+	na = (struct nlattr *) GENLMSG_DATA(&ans);
+	na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
+
+	uint16_t id = 0;
+
+	if (na->nla_type == CTRL_ATTR_FAMILY_ID)
+		id = *(uint16_t *) NLA_DATA(na);
+
+	return id;
+}
+
+void print_procacct(int fd, struct taskstats *t)
+{
+	struct proc_cmdline key = { .pid = (pid_t) t->ac_pid };
+	struct proc_cmdline *proc = tfind(&key, proc_root, proc_compare);
+
+	dprintf(fd,
+	        "%c\t%u\t%u\t%llu\t%llu\t%llu\t%llu\t %llu\t[%s]\t%s\n",
+	        // First letter: T is a mere thread, G the last in a group, U  unknown.
+	        (t->version >= 12 ? (t->ac_flag & AGROUP ? 'P' : 'T') : '?'),
+	        (t->ac_pid),                         // pid
+	        (t->version >= 12 ? t->ac_tgid : 0), // tgid
+	        (t->ac_btime64),                     // btime
+	        (t->ac_etime),                       // wall
+	        (t->ac_utime + t->ac_stime),         // cputime
+	        (t->hiwater_vm),                     // vmusage
+	        (t->hiwater_rss),                    // rssusage
+	        (t->ac_comm),                        // comm
+	        (proc ? proc->cmdline : "")          // cmdline
+	       );
+}
+
+void handle_aggr(struct nlattr *na, int fd)
+{
+	int nla_type = na->nla_type;
+	int aggr_len = NLA_PAYLOAD(na->nla_len);
+	int len2 = 0;
+
+	na = (struct nlattr *) NLA_DATA(na);
+	while (len2 < aggr_len) {
+		switch (na->nla_type) {
+			case TASKSTATS_TYPE_PID:
+			case TASKSTATS_TYPE_TGID:
+				break;
+			case TASKSTATS_TYPE_STATS:
+				if (nla_type == TASKSTATS_TYPE_AGGR_PID)
+					print_procacct(fd, (struct taskstats *) NLA_DATA(na));
+				break;
+			case TASKSTATS_TYPE_NULL:
+				break;
+			default:
+				rd_err("unknown nested nla_type %d", na->nla_type);
+				break;
+		}
+		len2 += NLA_ALIGN(na->nla_len);
+		na = (struct nlattr *)((char *) na + NLA_ALIGN(na->nla_len));
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	ssize_t ret;
+	uint16_t id;
+
+	char cpumask[100 + 6 * MAX_CPUS];
+	uint16_t cpumask_len;
+
+	int fd_nlink = -1;
+	int fd_out = 1;
+	int write_file = 0;
+	int maskset = 0;
+	char *logfile = NULL;
+
+	while (1) {
+		int c = getopt(argc, argv, "m:o:r:");
+		if (c < 0)
+			break;
+
+		switch (c) {
+			case 'o':
+				logfile = strdup(optarg);
+				write_file = 1;
+				break;
+			case 'r':
+				rcvbufsz = atoi(optarg);
+				if (rcvbufsz < 0)
+					rd_fatal("invalid rcv buf size");
+				break;
+			case 'm':
+				strlcpy(cpumask, optarg, sizeof(cpumask));
+				maskset = 1;
+				break;
+			default:
+				usage();
+		}
+	}
+	if (!maskset) {
+		long np = sysconf(_SC_NPROCESSORS_ONLN);
+		if (np > 1)
+			snprintf(cpumask, sizeof(cpumask), "0-%ld", np - 1);
+		else
+			snprintf(cpumask, sizeof(cpumask), "1");
+	}
+
+	if ((strlen(cpumask) + 1) > USHRT_MAX)
+		rd_fatal("cpumask too long");
+
+	cpumask_len = (uint16_t) strlen(cpumask) + 1;
+
+	if (write_file) {
+		fd_out = open(logfile, O_WRONLY | O_CREAT | O_TRUNC | O_SYNC, 0644);
+		if (fd_out < 0)
+			rd_fatal("cannot open output file: %s: %m", logfile);
+	}
+
+	fd_nlink = create_nl_socket(NETLINK_GENERIC);
+	if (fd_nlink < 0)
+		rd_fatal("error creating Netlink socket: %m");
+
+	current_pid = getpid();
+
+	id = get_family_id(fd_nlink);
+	if (!id) {
+		rd_err("error getting family id, errno=%d", errno);
+		goto err;
+	}
+
+	ret = send_cmd(fd_nlink, id, TASKSTATS_CMD_GET,
+	               TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, &cpumask, cpumask_len);
+	if (ret < 0) {
+		rd_err("error sending register cpumask");
+		goto err;
+	}
+
+	while (1) {
+		struct msgtemplate msg;
+		struct nlattr *na;
+
+		ret = recv(fd_nlink, &msg, sizeof(msg), 0);
+		if (ret < 0) {
+			rd_err("nonfatal reply error: errno=%d", errno);
+			continue;
+		}
+
+		if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), ret)) {
+			struct nlmsgerr *err = NLMSG_DATA(&msg);
+			rd_fatal("fatal reply error, errno=%d", err->error);
+		}
+
+		ret = GENLMSG_PAYLOAD(&msg.n);
+		na = (struct nlattr *) GENLMSG_DATA(&msg);
+
+		ssize_t len = 0;
+
+		while (len < ret) {
+			switch (na->nla_type) {
+				case TASKSTATS_TYPE_NULL:
+					break;
+				case TASKSTATS_TYPE_AGGR_PID:
+				case TASKSTATS_TYPE_AGGR_TGID:
+					/* For nested attributes, na follows */
+					handle_aggr(na, fd_out);
+					break;
+				default:
+					rd_err("unexpected nla_type %d", na->nla_type);
+			}
+
+			len += NLA_ALIGN(na->nla_len);
+			na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
+		}
+	}
+
+	ret = send_cmd(fd_nlink, id, TASKSTATS_CMD_GET,
+	               TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, &cpumask, cpumask_len);
+	if (ret < 0)
+		rd_fatal("error sending deregister cpumask");
+err:
+	close(fd_nlink);
+
+	if (fd_out)
+		close(fd_out);
+
+	return 0;
+}
-- 
2.33.8

next prev parent reply	other threads:[~2023-06-15 17:59 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-15 17:59 [make-initrd] [PATCH v1 00/11] Add accounting feature Alexey Gladkov
2023-06-15 17:59 ` Alexey Gladkov [this message]
2023-06-15 17:59 ` [make-initrd] [PATCH v1 02/11] feature/procacct: Use epoll Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 03/11] feature/procacct: Use default rcvbufsz Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 04/11] feature/procacct: Track more values Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 05/11] feature/procacct: Use msgtemplate instead of custom struct Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 06/11] feature/procacct: Use nonblocking per-call Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 07/11] feature/procacct: Add bpf helper Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 08/11] feature/procacct: Add accounting report Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 09/11] feature/procacct: Wait until procacct is initialized Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 10/11] feature/procacct: Make procacct optional Alexey Gladkov
2023-06-15 17:59 ` [make-initrd] [PATCH v1 11/11] feature/procacct: Add to testing Alexey Gladkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e283888669a329cd7a518bd02cd2cfffd6478989.1686851829.git.gladkov.alexey@gmail.com \
    --to=gladkov.alexey@gmail.com \
    --cc=make-initrd@lists.altlinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Make-initrd development discussion

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://lore.altlinux.org/make-initrd/0 make-initrd/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 make-initrd make-initrd/ http://lore.altlinux.org/make-initrd \
		make-initrd@lists.altlinux.org make-initrd@lists.altlinux.ru make-initrd@lists.altlinux.com
	public-inbox-index make-initrd

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://lore.altlinux.org/org.altlinux.lists.make-initrd


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git