diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 49bbe9fd85b3..43e0290ce6d8 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,6 +24,8 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o +kvm-$(CONFIG_TRACING) += hyp_trace.o + always-y := hyp_constants.h hyp-constants.s define rule_gen_hyp_constants diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index dab1856a3f20..379d6efe5c08 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -27,6 +27,8 @@ #define CREATE_TRACE_POINTS #include "trace_arm.h" +#include "hyp_trace.h" + #include #include #include @@ -2391,6 +2393,10 @@ int kvm_arch_init(void *opaque) kvm_err("Failed to finalize Hyp protection\n"); goto out_hyp; } + + err = init_hyp_tracefs(); + if (err) + kvm_err("Failed to initialize Hyp tracing\n"); } if (is_protected_kvm_enabled()) { diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c index b257a3b4bfc5..9fe0d2a624ef 100644 --- a/arch/arm64/kvm/hyp/hyp-constants.c +++ b/arch/arm64/kvm/hyp/hyp-constants.c @@ -3,11 +3,15 @@ #include #include #include +#include int main(void) { DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page)); DEFINE(PKVM_HYP_VM_SIZE, sizeof(struct pkvm_hyp_vm)); DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu)); +#ifdef CONFIG_FTRACE + DEFINE(STRUCT_HYP_BUFFER_PAGE_SIZE, sizeof(struct hyp_buffer_page)); +#endif return 0; } diff --git a/arch/arm64/kvm/hyp_trace.c b/arch/arm64/kvm/hyp_trace.c new file mode 100644 index 000000000000..514775697fc6 --- /dev/null +++ b/arch/arm64/kvm/hyp_trace.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Google LLC + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "hyp_constants.h" +#include "hyp_trace.h" + +#define RB_POLL_MS 1000 + +#define TRACEFS_DIR "hyp" + +static bool hyp_trace_on; +static int hyp_trace_readers; +static struct trace_buffer *hyp_trace_buffer; +static size_t hyp_trace_buffer_size = 7 << 10; +static struct hyp_buffer_pages_backing hyp_buffer_pages_backing; +static DEFINE_MUTEX(hyp_trace_lock); +static DEFINE_PER_CPU(struct mutex, hyp_trace_reader_lock); + +static int bpage_backing_setup(struct hyp_trace_pack *pack) +{ + size_t backing_size; + void *start; + + if (hyp_buffer_pages_backing.start) + return -EBUSY; + + backing_size = STRUCT_HYP_BUFFER_PAGE_SIZE * + pack->trace_buffer_pack.total_pages; + backing_size = PAGE_ALIGN(backing_size); + + start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT); + if (!start) + return -ENOMEM; + + hyp_buffer_pages_backing.start = (unsigned long)start; + hyp_buffer_pages_backing.size = backing_size; + pack->backing.start = (unsigned long)start; + pack->backing.size = backing_size; + + return 0; +} + +static void bpage_backing_teardown(void) +{ + unsigned long backing = hyp_buffer_pages_backing.start; + + if (!hyp_buffer_pages_backing.start) + return; + + free_pages_exact((void *)backing, hyp_buffer_pages_backing.size); + + hyp_buffer_pages_backing.start = 0; + hyp_buffer_pages_backing.size = 0; +} + +/* + * Configure the hyp tracing clock. So far, only one is supported: "boot". This + * clock doesn't stop during suspend making it a good candidate. The downside is + * if this clock is corrected by NTP while tracing, the hyp clock will slightly + * drift compared to the host version. + */ +static void hyp_clock_setup(struct hyp_trace_pack *pack) +{ + struct kvm_nvhe_clock_data *clock_data = &pack->trace_clock_data; + struct system_time_snapshot snap; + + ktime_get_snapshot(&snap); + + clock_data->epoch_cyc = snap.cycles; + clock_data->epoch_ns = snap.boot; + clock_data->mult = snap.mono_mult; + clock_data->shift = snap.mono_shift; +} + +static int __swap_reader_page(int cpu) +{ + return kvm_call_hyp_nvhe(__pkvm_rb_swap_reader_page, cpu); +} + +static int __update_footers(int cpu) +{ + return kvm_call_hyp_nvhe(__pkvm_rb_update_footers, cpu); +} + +struct ring_buffer_ext_cb hyp_cb = { + .update_footers = __update_footers, + .swap_reader = __swap_reader_page, +}; + +static inline int share_page(unsigned long va) +{ + return kvm_call_hyp_nvhe(__pkvm_host_share_hyp, virt_to_pfn(va), 1); +} + +static inline int unshare_page(unsigned long va) +{ + return kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, virt_to_pfn(va), 1); +} + +static int trace_pack_pages_apply(struct trace_buffer_pack *trace_pack, + int (*func)(unsigned long)) +{ + struct ring_buffer_pack *rb_pack; + int cpu, i, ret; + + for_each_ring_buffer_pack(rb_pack, cpu, trace_pack) { + ret = func(rb_pack->reader_page_va); + if (ret) + return ret; + + for (i = 0; i < rb_pack->nr_pages; i++) { + ret = func(rb_pack->page_va[i]); + if (ret) + return ret; + } + } + + return 0; +} + +/* + * hyp_trace_pack size depends on trace_buffer_pack's, so + * trace_buffer_setup is in charge of the allocation for the former. + */ +static int trace_buffer_setup(struct hyp_trace_pack **pack, size_t *pack_size) +{ + struct trace_buffer_pack *trace_pack; + int ret; + + hyp_trace_buffer = ring_buffer_alloc_ext(hyp_trace_buffer_size, &hyp_cb); + if (!hyp_trace_buffer) + return -ENOMEM; + + *pack_size = offsetof(struct hyp_trace_pack, trace_buffer_pack) + + trace_buffer_pack_size(hyp_trace_buffer); + /* + * The hypervisor will unmap the pack from the host to protect the + * reading. Page granularity for the pack allocation ensures no other + * useful data will be unmapped. + */ + *pack_size = PAGE_ALIGN(*pack_size); + *pack = alloc_pages_exact(*pack_size, GFP_KERNEL); + if (!*pack) { + ret = -ENOMEM; + goto err; + } + + trace_pack = &(*pack)->trace_buffer_pack; + WARN_ON(trace_buffer_pack(hyp_trace_buffer, trace_pack)); + + ret = trace_pack_pages_apply(trace_pack, share_page); + if (ret) { + trace_pack_pages_apply(trace_pack, unshare_page); + free_pages_exact(*pack, *pack_size); + goto err; + } + + return 0; +err: + ring_buffer_free(hyp_trace_buffer); + hyp_trace_buffer = NULL; + + return ret; +} + +static void trace_buffer_teardown(struct trace_buffer_pack *trace_pack) +{ + bool alloc_trace_pack = !trace_pack; + + if (alloc_trace_pack) { + trace_pack = kzalloc(trace_buffer_pack_size(hyp_trace_buffer), GFP_KERNEL); + if (!trace_pack) { + WARN_ON(1); + goto end; + } + } + + WARN_ON(trace_buffer_pack(hyp_trace_buffer, trace_pack)); + WARN_ON(trace_pack_pages_apply(trace_pack, unshare_page)); + + if (alloc_trace_pack) + kfree(trace_pack); +end: + ring_buffer_free(hyp_trace_buffer); + hyp_trace_buffer = NULL; +} + +static void hyp_free_tracing(void) +{ + if (!hyp_trace_buffer) + return; + + trace_buffer_teardown(NULL); + bpage_backing_teardown(); +} + +static int hyp_start_tracing(void) +{ + struct hyp_trace_pack *pack; + size_t pack_size; + int ret = 0; + + if (hyp_trace_on || hyp_trace_readers) + return -EBUSY; + + hyp_free_tracing(); + + ret = trace_buffer_setup(&pack, &pack_size); + if (ret) + return ret; + + hyp_clock_setup(pack); + + ret = bpage_backing_setup(pack); + if (ret) + goto end_buffer_teardown; + + ret = kvm_call_hyp_nvhe(__pkvm_start_tracing, (unsigned long)pack, pack_size); + if (!ret) { + hyp_trace_on = true; + goto end_free_pack; + } + + bpage_backing_teardown(); +end_buffer_teardown: + trace_buffer_teardown(&pack->trace_buffer_pack); +end_free_pack: + free_pages_exact(pack, pack_size); + + return ret; +} + +static void hyp_stop_tracing(void) +{ + int ret; + + if (!hyp_trace_buffer || !hyp_trace_on) + return; + + ret = kvm_call_hyp_nvhe(__pkvm_stop_tracing); + if (ret) { + WARN_ON(1); + return; + } + + hyp_trace_on = false; +} + +static ssize_t +hyp_tracing_on(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) +{ + int err = 0; + char c; + + if (cnt != 2) + return -EINVAL; + + if (get_user(c, ubuf)) + return -EFAULT; + + mutex_lock(&hyp_trace_lock); + + switch (c) { + case '1': + err = hyp_start_tracing(); + break; + case '0': + hyp_stop_tracing(); + break; + default: + err = -EINVAL; + } + + mutex_unlock(&hyp_trace_lock); + + return err ? err : cnt; +} + +static ssize_t hyp_tracing_on_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[3]; + int r; + + mutex_lock(&hyp_trace_lock); + r = sprintf(buf, "%d\n", hyp_trace_on); + mutex_unlock(&hyp_trace_lock); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static const struct file_operations hyp_tracing_on_fops = { + .write = hyp_tracing_on, + .read = hyp_tracing_on_read, +}; + +static ssize_t hyp_buffer_size(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + int ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + if (!val) + return -EINVAL; + + mutex_lock(&hyp_trace_lock); + hyp_trace_buffer_size = val << 10; /* KB to B */ + mutex_unlock(&hyp_trace_lock); + + return cnt; +} + +static ssize_t hyp_buffer_size_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + mutex_lock(&hyp_trace_lock); + r = sprintf(buf, "%lu\n", hyp_trace_buffer_size >> 10); + mutex_unlock(&hyp_trace_lock); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static const struct file_operations hyp_buffer_size_fops = { + .write = hyp_buffer_size, + .read = hyp_buffer_size_read, +}; + +static inline void hyp_trace_read_start(int cpu) +{ + mutex_lock(&per_cpu(hyp_trace_reader_lock, cpu)); +} + +static inline void hyp_trace_read_stop(int cpu) +{ + mutex_unlock(&per_cpu(hyp_trace_reader_lock, cpu)); +} + +static void ht_print_trace_time(struct ht_iterator *iter) +{ + unsigned long usecs_rem; + u64 ts_ns = iter->ts; + + do_div(ts_ns, 1000); + usecs_rem = do_div(ts_ns, USEC_PER_SEC); + + trace_seq_printf(&iter->seq, "[%5lu.%06lu] ", + (unsigned long)ts_ns, usecs_rem); +} + +static void ht_print_trace_fmt(struct ht_iterator *iter) +{ + if (iter->lost_events) + trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", + iter->cpu, iter->lost_events); + + /* TODO: format bin/hex/raw */ + + ht_print_trace_time(iter); +}; + +static void *ht_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ht_iterator *iter = m->private; + struct ring_buffer_event *evt; + u64 ts; + + (*pos)++; + + evt = ring_buffer_iter_peek(iter->buf_iter, &ts); + if (!evt) + return NULL; + + iter->ent = (struct hyp_entry_hdr *)&evt->array[1]; + iter->ts = ts; + iter->ent_size = evt->array[0]; + ring_buffer_iter_advance(iter->buf_iter); + + return iter; +} + +static void *ht_start(struct seq_file *m, loff_t *pos) +{ + struct ht_iterator *iter = m->private; + + if (*pos == 0) { + ring_buffer_iter_reset(iter->buf_iter); + (*pos)++; + iter->ent = NULL; + + return iter; + } + + hyp_trace_read_start(iter->cpu); + + return ht_next(m, NULL, pos); +} + +static void ht_stop(struct seq_file *m, void *v) +{ + struct ht_iterator *iter = m->private; + + hyp_trace_read_stop(iter->cpu); +} + +static int ht_show(struct seq_file *m, void *v) +{ + struct ht_iterator *iter = v; + + if (!iter->ent) { + unsigned long entries, overrun; + + entries = ring_buffer_entries_cpu(hyp_trace_buffer, iter->cpu); + overrun = ring_buffer_overrun_cpu(hyp_trace_buffer, iter->cpu); + + seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu\n", + entries, overrun + entries); + } else { + ht_print_trace_fmt(iter); + trace_print_seq(m, &iter->seq); + } + + return 0; +} + +static const struct seq_operations hyp_trace_ops = { + .start = ht_start, + .next = ht_next, + .stop = ht_stop, + .show = ht_show, +}; + +static int hyp_trace_open(struct inode *inode, struct file *file) +{ + unsigned long cpu = (unsigned long)inode->i_private; + struct ht_iterator *iter; + int ret = 0; + + mutex_lock(&hyp_trace_lock); + + if (!hyp_trace_buffer) { + ret = -ENODEV; + goto unlock; + } + + iter = __seq_open_private(file, &hyp_trace_ops, sizeof(*iter)); + if (!iter) { + ret = -ENOMEM; + goto unlock; + } + + iter->buf_iter = ring_buffer_read_prepare(hyp_trace_buffer, cpu, GFP_KERNEL); + if (!iter->buf_iter) { + seq_release_private(inode, file); + ret = -ENOMEM; + goto unlock; + } + + iter->cpu = cpu; + + ring_buffer_read_prepare_sync(); + ring_buffer_read_start(iter->buf_iter); + + hyp_trace_readers++; +unlock: + mutex_unlock(&hyp_trace_lock); + + return ret; +} + +int hyp_trace_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + struct ht_iterator *iter = m->private; + + ring_buffer_read_finish(iter->buf_iter); + + mutex_lock(&hyp_trace_lock); + hyp_trace_readers--; + mutex_unlock(&hyp_trace_lock); + + return seq_release_private(inode, file); +} + +static const struct file_operations hyp_trace_fops = { + .open = hyp_trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = hyp_trace_release, +}; + +/* + * TODO: should be merged with the ring_buffer_iterator version + */ +static void *trace_buffer_peek(struct ht_iterator *iter) +{ + struct ring_buffer_event *event; + + if (ring_buffer_empty_cpu(iter->trace_buffer, iter->cpu)) + return NULL; + + event = ring_buffer_peek(iter->trace_buffer, iter->cpu, &iter->ts, &iter->lost_events); + if (!event) + return NULL; + + iter->ent = (struct hyp_entry_hdr *)&event->array[1]; + iter->ent_size = event->array[0]; + + return iter; +} + +static ssize_t +hyp_trace_pipe_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ht_iterator *iter = (struct ht_iterator *)file->private_data; + struct trace_buffer *trace_buffer = iter->trace_buffer; + int ret; + + trace_seq_init(&iter->seq); +again: + ret = ring_buffer_wait(trace_buffer, iter->cpu, 0); + if (ret < 0) + return ret; + + hyp_trace_read_start(iter->cpu); + while (trace_buffer_peek(iter)) { + unsigned long lost_events; + + ht_print_trace_fmt(iter); + ring_buffer_consume(iter->trace_buffer, iter->cpu, NULL, &lost_events); + } + hyp_trace_read_stop(iter->cpu); + + ret = trace_seq_to_user(&iter->seq, ubuf, cnt); + if (ret == -EBUSY) + goto again; + + return ret; +} + +static void __poke_reader(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct ht_iterator *iter; + + iter = container_of(dwork, struct ht_iterator, poke_work); + + WARN_ON_ONCE(ring_buffer_poke(iter->trace_buffer, iter->cpu)); + + schedule_delayed_work((struct delayed_work *)work, + msecs_to_jiffies(RB_POLL_MS)); +} + +static int hyp_trace_pipe_open(struct inode *inode, struct file *file) +{ + unsigned long cpu = (unsigned long)inode->i_private; + struct ht_iterator *iter; + int ret = -EINVAL; + + mutex_lock(&hyp_trace_lock); + + if (!hyp_trace_buffer) + goto unlock; + + ret = ring_buffer_poke(hyp_trace_buffer, cpu); + if (ret) + goto unlock; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + ret = -ENOMEM; + goto unlock; + } + + iter->cpu = cpu; + iter->trace_buffer = hyp_trace_buffer; + + INIT_DELAYED_WORK(&iter->poke_work, __poke_reader); + schedule_delayed_work(&iter->poke_work, msecs_to_jiffies(RB_POLL_MS)); + + file->private_data = iter; + + hyp_trace_readers++; +unlock: + mutex_unlock(&hyp_trace_lock); + + return ret; +} + +static int hyp_trace_pipe_release(struct inode *inode, struct file *file) +{ + struct ht_iterator *iter = file->private_data; + + cancel_delayed_work_sync(&iter->poke_work); + + kfree(iter); + + mutex_lock(&hyp_trace_lock); + hyp_trace_readers--; + mutex_unlock(&hyp_trace_lock); + + return 0; +} + +static const struct file_operations hyp_trace_pipe_fops = { + .open = hyp_trace_pipe_open, + .read = hyp_trace_pipe_read, + .release = hyp_trace_pipe_release, + .llseek = no_llseek, +}; + +static void hyp_tracefs_create_cpu_file(const char *file_name, + unsigned long cpu, + const struct file_operations *fops, + struct dentry *parent) +{ + if (!tracefs_create_file(file_name, 0440, parent, (void *)cpu, fops)) + pr_warn("Failed to create tracefs %pd/%s\n", parent, file_name); +} + +int init_hyp_tracefs(void) +{ + struct dentry *d, *root_dir, *per_cpu_root_dir; + char per_cpu_name[16]; + unsigned long cpu; + + if (!is_protected_kvm_enabled()) + return 0; + + root_dir = tracefs_create_dir(TRACEFS_DIR, NULL); + if (!root_dir) { + pr_err("Failed to create tracefs "TRACEFS_DIR"/\n"); + return -ENODEV; + } + + d = tracefs_create_file("tracing_on", 0640, root_dir, NULL, + &hyp_tracing_on_fops); + if (!d) { + pr_err("Failed to create tracefs "TRACEFS_DIR"/tracing_on\n"); + return -ENODEV; + } + + d = tracefs_create_file("buffer_size_kb", 0640, root_dir, NULL, + &hyp_buffer_size_fops); + if (!d) + pr_err("Failed to create tracefs "TRACEFS_DIR"/buffer_size_kb\n"); + + + per_cpu_root_dir = tracefs_create_dir("per_cpu", root_dir); + if (!per_cpu_root_dir) { + pr_err("Failed to create tracefs "TRACEFS_DIR"/per_cpu/\n"); + return -ENODEV; + } + + for_each_possible_cpu(cpu) { + struct dentry *dir; + + snprintf(per_cpu_name, sizeof(per_cpu_name), "cpu%lu", cpu); + dir = tracefs_create_dir(per_cpu_name, per_cpu_root_dir); + if (!dir) { + pr_warn("Failed to create tracefs "TRACEFS_DIR"/per_cpu/cpu%lu\n", + cpu); + continue; + } + + hyp_tracefs_create_cpu_file("trace", cpu, &hyp_trace_fops, dir); + hyp_tracefs_create_cpu_file("trace_pipe", cpu, + &hyp_trace_pipe_fops, dir); + } + + return 0; +} diff --git a/arch/arm64/kvm/hyp_trace.h b/arch/arm64/kvm/hyp_trace.h new file mode 100644 index 000000000000..7d062ec9bf19 --- /dev/null +++ b/arch/arm64/kvm/hyp_trace.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ARM64_KVM_HYP_TRACE_H__ +#define __ARM64_KVM_HYP_TRACE_H__ + +#include +#include + +struct ht_iterator { + struct ring_buffer_iter *buf_iter; + struct trace_buffer *trace_buffer; + struct hyp_entry_hdr *ent; + struct trace_seq seq; + u64 ts; + size_t ent_size; + struct delayed_work poke_work; + unsigned long lost_events; + int cpu; +}; + +#ifdef CONFIG_TRACING +int init_hyp_tracefs(void); +#else +static inline int init_hyp_tracefs(void) { return 0; } +#endif +#endif