Linux Kprobe
zhilu.zhang
zhilu.zhang
发布于 2023-07-04 / 32 阅读 / 0 评论 / 0 点赞

Linux Kprobe

Linux Kprobe

一、overview

kprobe是Linux内核自带的调试机制,可以在内核的函数执行前后添加钩子来执行自定义函数来达到修改,调试内核的目的。
具体简介可以参考内核官方文档,这个文档已经有比较详细的记录.
https://www.kernel.org/doc/html/latest/trace/kprobes.html

struce kprobe

struct kprobe {
    struct list_head list;                  // 在内核的 kprobe 列表中链接的链表指针
    kprobe_opcode_t *addr;                   // 目标函数的地址指针
    kprobe_pre_handler_t pre_handler;        // 目标函数执行前的预处理函数
    kprobe_post_handler_t post_handler;      // 目标函数执行后的后处理函数
    kprobe_fault_handler_t fault_handler;    // 目标函数故障处理函数
    const char *symbol_name;                 // 目标函数的符号名称
    struct kretprobe *rp;                    // 与此 kprobe 关联的 kretprobe 结构体指针(可选)
    // 其他字段可以根据需要进行设置
};

struct kretprobe

struct kretprobe {
	struct kprobe kp;		/* 内核探测点的结构体 */
	kretprobe_handler_t handler;	/* 返回函数跟踪处理程序 */
	kretprobe_entry_handler_t entry_handler; /* 返回函数入口跟踪处理程序 */
	kretprobe_reentry_handler_t reentry_handler; /* 返回函数重新进入跟踪处理程序 */
	kretprobe_fault_handler_t fault_handler; /* 出现故障时的处理程序 */
	kretprobe_break_handler_t break_handler; /* 断点跟踪处理程序 */
	kretprobe_maxactive_t maxactive;	/* 最大活动实例数 */
	unsigned int flags;		/* 标志位 */
	struct hlist_node hlist;	/* 结构体链接到全局散列表 */
};

struce jprobe

struct jprobe {
    struct kprobe kp;//一个包含内核探测点信息的结构体 struct kprobe,用于设置要跟踪的函数的断点位置。
    void (*entry)(struct kprobe *, struct pt_regs *);//一个函数指针,指向要执行的替代函数。当跟踪的函数被调用时,将执行这个替代函数。
    void (*kp_callback)(struct kprobe *, struct pt_regs *);//一个可选的回调函数指针,在被跟踪函数返回时执行自定义的操作。
};

二、配置宏

使用kprobe动态调试技术需要打开:

CONFIG_KPROBES=y

官方有一个简单基础的演示示例kernel/samples/kprobes,打开控制宏:

CONFIG_SAMPLES=y
CONFIG_SAMPLE_KPROBES=y
CONFIG_CONFIG_SAMPLE_KRETPROBES=y

三、示例使用方法

kprobe

示例函数检测的函数是kernel_clone

static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
module_param_string(symbol, symbol, sizeof(symbol), 0644);

替换kernel_clone,使用meminfo_proc_show 函数。

示范

root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# insmod kprobe_example.ko symbol=meminfo_proc_show
root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# cat /proc/meminfo 
MemTotal:        4644992 kB
MemFree:         2211528 kB
MemAvailable:    2085916 kB
Buffers:           18068 kB
Cached:            36768 kB
SwapCached:            0 kB
Active:            19988 kB
Inactive:        2161392 kB
Active(anon):        128 kB
Inactive(anon):  2143608 kB
Active(file):      19860 kB
Inactive(file):    17784 kB
Unevictable:       17140 kB
Mlocked:               0 kB
SwapTotal:             0 kB
SwapFree:              0 kB
Dirty:                 8 kB
Writeback:             0 kB
AnonPages:       2143692 kB
Mapped:             5904 kB
Shmem:                44 kB
KReclaimable:     131028 kB
Slab:             187672 kB
SReclaimable:     131028 kB
SUnreclaim:        56644 kB
KernelStack:        4848 kB
PageTables:         5428 kB
NFS_Unstable:          0 kB
Bounce:                0 kB
WritebackTmp:          0 kB
CommitLimit:     2322496 kB
Committed_AS:    2562120 kB
VmallocTotal:   135290159040 kB
VmallocUsed:       14120 kB
VmallocChunk:          0 kB
Percpu:              960 kB
HardwareCorrupted:     0 kB
CmaTotal:         327680 kB
CmaFree:          324464 kB
root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# dmesg -c
[ 2513.475944] Planted kprobe at 0000000041e7825b
[ 2520.404196] <meminfo_proc_show> pre_handler: p->addr = 0x0000000041e7825b, pc = 0xffff800010206a50, pstate = 0x400005
[ 2520.404213] <meminfo_proc_show> post_handler: p->addr = 0x0000000041e7825b, pstate = 0x400005
root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# 

kretprobe

示例函数检测的函数是kernel_clone

static char func_name[NAME_MAX] = "kernel_clone";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);

替换kernel_clone,使用meminfo_proc_show 函数。

示范

root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# insmod kretprobe_example.ko func=meminfo_proc_show
root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# cat /proc/meminfo 
MemTotal:        4644992 kB
MemFree:         2211796 kB
MemAvailable:    2086168 kB
Buffers:           18124 kB
Cached:            36780 kB
SwapCached:            0 kB
Active:            20040 kB
Inactive:        2161372 kB
Active(anon):        128 kB
Inactive(anon):  2143588 kB
Active(file):      19912 kB
Inactive(file):    17784 kB
Unevictable:       17140 kB
Mlocked:               0 kB
SwapTotal:             0 kB
SwapFree:              0 kB
Dirty:                 8 kB
Writeback:             0 kB
AnonPages:       2143692 kB
Mapped:             5904 kB
Shmem:                44 kB
KReclaimable:     130952 kB
Slab:             187628 kB
SReclaimable:     130952 kB
SUnreclaim:        56676 kB
KernelStack:        4832 kB
PageTables:         5428 kB
NFS_Unstable:          0 kB
Bounce:                0 kB
WritebackTmp:          0 kB
CommitLimit:     2322496 kB
Committed_AS:    2562120 kB
VmallocTotal:   135290159040 kB
VmallocUsed:       14088 kB
VmallocChunk:          0 kB
Percpu:              960 kB
HardwareCorrupted:     0 kB
CmaTotal:         327680 kB
CmaFree:          324464 kB
root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# dmesg 
[ 2796.385110] Planted return probe at meminfo_proc_show: 0000000041e7825b
[ 2802.299921] meminfo_proc_show returned 0 and took 31750 ns to execute
root@zzl:/system/lib/modules/5.10.59/kernel/samples/kprobes# 

jprobe

四、代码分析

kprobe_example.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * NOTE: This example is works on x86 and powerpc.
 * Here's a sample kernel module showing the use of kprobes to dump a
 * stack trace and selected registers when kernel_clone() is called.
 *
 * For more information on theory of operation of kprobes, see
 * Documentation/trace/kprobes.rst
 *
 * You will see the trace data in /var/log/messages and on the console
 * whenever kernel_clone() is invoked to create a new process.
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>

#define MAX_SYMBOL_LEN	64
static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
module_param_string(symbol, symbol, sizeof(symbol), 0644);

/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
	.symbol_name	= symbol,
};

/* kprobe pre_handler: called just before the probed instruction is executed */
static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
{
#ifdef CONFIG_X86
	pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->ip, regs->flags);
#endif
#ifdef CONFIG_PPC
	pr_info("<%s> pre_handler: p->addr = 0x%p, nip = 0x%lx, msr = 0x%lx\n",
		p->symbol_name, p->addr, regs->nip, regs->msr);
#endif
#ifdef CONFIG_MIPS
	pr_info("<%s> pre_handler: p->addr = 0x%p, epc = 0x%lx, status = 0x%lx\n",
		p->symbol_name, p->addr, regs->cp0_epc, regs->cp0_status);
#endif
#ifdef CONFIG_ARM64
	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
			" pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
#endif
#ifdef CONFIG_S390
	pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->psw.addr, regs->flags);
#endif

	/* A dump_stack() here will give a stack backtrace */
	return 0;
}

/* kprobe post_handler: called after the probed instruction is executed */
static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
				unsigned long flags)
{
#ifdef CONFIG_X86
	pr_info("<%s> post_handler: p->addr = 0x%p, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->flags);
#endif
#ifdef CONFIG_PPC
	pr_info("<%s> post_handler: p->addr = 0x%p, msr = 0x%lx\n",
		p->symbol_name, p->addr, regs->msr);
#endif
#ifdef CONFIG_MIPS
	pr_info("<%s> post_handler: p->addr = 0x%p, status = 0x%lx\n",
		p->symbol_name, p->addr, regs->cp0_status);
#endif
#ifdef CONFIG_ARM64
	pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
		p->symbol_name, p->addr, (long)regs->pstate);
#endif
#ifdef CONFIG_S390
	pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n",
		p->symbol_name, p->addr, regs->flags);
#endif
}

/*
 * fault_handler: this is called if an exception is generated for any
 * instruction within the pre- or post-handler, or when Kprobes
 * single-steps the probed instruction.
 */
static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
	pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
	/* Return 0 because we don't handle the fault. */
	return 0;
}
/* NOKPROBE_SYMBOL() is also available */
NOKPROBE_SYMBOL(handler_fault);

static int __init kprobe_init(void)
{
	int ret;
	kp.pre_handler = handler_pre;
	kp.post_handler = handler_post;
	kp.fault_handler = handler_fault;

	ret = register_kprobe(&kp);
	if (ret < 0) {
		pr_err("register_kprobe failed, returned %d\n", ret);
		return ret;
	}
	pr_info("Planted kprobe at %p\n", kp.addr);
	return 0;
}

static void __exit kprobe_exit(void)
{
	unregister_kprobe(&kp);
	pr_info("kprobe at %p unregistered\n", kp.addr);
}

module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");

kretprobe_example.c

// SPDX-License-Identifier: GPL-2.0-only
/*
 * kretprobe_example.c
 *
 * Here's a sample kernel module showing the use of return probes to
 * report the return value and total time taken for probed function
 * to run.
 *
 * usage: insmod kretprobe_example.ko func=<func_name>
 *
 * If no func_name is specified, kernel_clone is instrumented
 *
 * For more information on theory of operation of kretprobes, see
 * Documentation/trace/kprobes.rst
 *
 * Build and insert the kernel module as done in the kprobe example.
 * You will see the trace data in /var/log/messages and on the console
 * whenever the probed function returns. (Some messages may be suppressed
 * if syslogd is configured to eliminate duplicate messages.)
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
#include <linux/limits.h>
#include <linux/sched.h>

static char func_name[NAME_MAX] = "kernel_clone";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
			" function's execution time");

/* per-instance private data */
struct my_data {
	ktime_t entry_stamp;
};

/* Here we use the entry_hanlder to timestamp function entry */
static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
	struct my_data *data;

	if (!current->mm)
		return 1;	/* Skip kernel threads */

	data = (struct my_data *)ri->data;
	data->entry_stamp = ktime_get();
	return 0;
}
NOKPROBE_SYMBOL(entry_handler);

/*
 * Return-probe handler: Log the return value and duration. Duration may turn
 * out to be zero consistently, depending upon the granularity of time
 * accounting on the platform.
 */
static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
	unsigned long retval = regs_return_value(regs);
	struct my_data *data = (struct my_data *)ri->data;
	s64 delta;
	ktime_t now;

	now = ktime_get();
	delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
	pr_info("%s returned %lu and took %lld ns to execute\n",
			func_name, retval, (long long)delta);
	return 0;
}
NOKPROBE_SYMBOL(ret_handler);

static struct kretprobe my_kretprobe = {
	.handler		= ret_handler,
	.entry_handler		= entry_handler,
	.data_size		= sizeof(struct my_data),
	/* Probe up to 20 instances concurrently. */
	.maxactive		= 20,
};

static int __init kretprobe_init(void)
{
	int ret;

	my_kretprobe.kp.symbol_name = func_name;
	ret = register_kretprobe(&my_kretprobe);
	if (ret < 0) {
		pr_err("register_kretprobe failed, returned %d\n", ret);
		return -1;
	}
	pr_info("Planted return probe at %s: %p\n",
			my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
	return 0;
}

static void __exit kretprobe_exit(void)
{
	unregister_kretprobe(&my_kretprobe);
	pr_info("kretprobe at %p unregistered\n", my_kretprobe.kp.addr);

	/* nmissed > 0 suggests that maxactive was set too low. */
	pr_info("Missed probing %d instances of %s\n",
		my_kretprobe.nmissed, my_kretprobe.kp.symbol_name);
}

module_init(kretprobe_init)
module_exit(kretprobe_exit)
MODULE_LICENSE("GPL");

jprobe_example.c

/*
 * Here's a sample kernel module showing the use of jprobes to dump
 * the arguments of do_fork().
 *
 * For more information on theory of operation of jprobes, see
 * Documentation/kprobes.txt
 *
 * Build and insert the kernel module as done in the kprobe example.
 * You will see the trace data in /var/log/messages and on the
 * console whenever do_fork() is invoked to create a new process.
 * (Some messages may be suppressed if syslogd is configured to
 * eliminate duplicate messages.)
 */

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>

/*
 * Jumper probe for do_fork.
 * Mirror principle enables access to arguments of the probed routine
 * from the probe handler.
 */

/* Proxy routine having the same arguments as actual do_fork() routine */
static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs,
		     unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr)
{
	printk(KERN_INFO "jprobe: clone_flags = 0x%lx, stack_size = 0x%lx,"
			 " regs = 0x%p\n",
	       clone_flags, stack_size, regs);

	/* Always end with a call to jprobe_return(). */
	jprobe_return();
	return 0;
}

static struct jprobe my_jprobe = {
	.entry			= jdo_fork,
	.kp = {
		.symbol_name	= "do_fork",
	},
};

static int __init jprobe_init(void)
{
	int ret;

	ret = register_jprobe(&my_jprobe);
	if (ret < 0) {
		printk(KERN_INFO "register_jprobe failed, returned %d\n", ret);
		return -1;
	}
	printk(KERN_INFO "Planted jprobe at %p, handler addr %p\n", my_jprobe.kp.addr, my_jprobe.entry);
	return 0;
}

static void __exit jprobe_exit(void)
{
	unregister_jprobe(&my_jprobe);
	printk(KERN_INFO "jprobe at %p unregistered\n", my_jprobe.kp.addr);
}

module_init(jprobe_init) module_exit(jprobe_exit) MODULE_LICENSE("GPL");

六、kprobe VS kretprobe VS jprobe

kretprobe底层通过kprobe来实现,与kprobe的不同在于实现了一系列辅助的函数来支持instance的实例化,这样在每次运行到对应的函数/地址时会自动生成一个新的private data,从而实现并发和多实例.

从官方的例子就可以看到,kretprobe可以实现统计函数运行时间的功能,但是简单的kprobe就不行(因为没有private data)。

kprobe的回调中,kprobe是全局共享的,所以没有简单的办法去追踪每次的函数调用。而kretprobe的回调会有instance的概念,并且可以可以携带private在kretprobe_instance中.

简而言之,kretprobe会处理好多实例的问题,并且对每次的函数进入,退出都有对应的private data可供保存对应的数据。而kprobe只能探测单条指令,在指令运行前后执行注册进取的callback.

总结:

  1. Kprobe(内核探测点):Kprobe 允许开发人员在内核函数的入口处或出口处设置断点,以便在函数调用或返回时执行特定的操作。通过在函数的开头或结尾插入代码,可以监视函数参数、修改数据,或者收集性能数据。Kprobe 机制通过注册内核中的断点来实现,使用 struct kprobe 结构体来描述断点位置和处理函数。

  2. Kretprobe(内核返回探测点):Kretprobe 类似于 Kprobe,但它专门用于在内核函数返回时设置断点。Kretprobe 允许开发人员检查函数的返回值、修改返回值,或者在函数返回时执行其他操作。与 Kprobe 类似,Kretprobe 也使用 struct kprobe 结构体,但是通过使用不同的处理函数来实现特定的返回处理逻辑。

  3. Jprobe:Jprobe 是 Kprobe 的一种变体,用于跟踪和替换内核函数。Jprobe 允许开发人员通过指定替代函数来拦截和跟踪特定的内核函数。当跟踪的函数被调用时,Jprobe 将执行替代函数,从而可以对函数的行为进行修改、调试或性能分析。Jprobe 使用 struct jprobe 结构体来描述要跟踪的函数以及替代函数的位置。

五、实现原理

使用查看vmlinux的文件:

aarch64-linux-gnu-objdump -d out/build/kernel/vmlinux > linux.txt