From: Dan Carpenter <dan.carpenter@oracle.com>
To: Elena Reshetova <elena.reshetova@intel.com>
Cc: smatch@vger.kernel.org
Subject: Re: [PATCH] Quick trial on tracing host inputs
Date: Tue, 8 Mar 2022 14:34:16 +0300 [thread overview]
Message-ID: <20220308113416.GG3315@kadam> (raw)
In-Reply-To: <20220308085630.551547-1-elena.reshetova@intel.com>
On Tue, Mar 08, 2022 at 10:56:30AM +0200, Elena Reshetova wrote:
> This is just a quick trial to trace inputs received
> from the host/VMM in the same way as user inputs.
>
> Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
> ---
> smatch_kernel_host_data.c | 1320 ++++++++++++++++++++++++++++++++++
> smatch_points_to_host_data.c | 334 +++++++++
The changes to smatch.h and check_list.h are missing.
> 2 files changed, 1654 insertions(+)
> create mode 100755 smatch_kernel_host_data.c
> create mode 100755 smatch_points_to_host_data.c
>
> diff --git a/smatch_kernel_host_data.c b/smatch_kernel_host_data.c
> new file mode 100755
> index 00000000..540875c5
> --- /dev/null
> +++ b/smatch_kernel_host_data.c
> @@ -0,0 +1,1320 @@
> +/*
> + * Copyright (C) 2011 Dan Carpenter.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt
> + */
> +
> +/* Note: The below code is just a quick trial to modify the
> + * smatch_kernel_host_data.c to work on data received from a
> + * untrusted host/VMM.
> + * Similar as smatch_kernel_user_data.c it works with
> + * smatch_points_to_host_data.c code. It also uses some helper functions
> + * from the check_host_input.c pattern.
> + */
> +
> +#include "smatch.h"
> +#include "smatch_slist.h"
> +#include "smatch_extra.h"
> +#include <math.h>
> +
> +const char *host_input_funcs[] = {
> + "inb", "inw", "inl", "inb_p", "inw_p", "inl_p", "insb", "insw", "insl", "get_dma_residue", "ioread8", "ioread16", "ioread32",
> + "ioread16be", "ioread32be", "ioread64_lo_hi", "ioread64_hi_lo", "ioread64be_lo_hi", "ioread64be_hi_lo", "ioread8_rep",
> + "ioread16_rep", "ioread32_rep", "__ioread32_copy", "iomap_readq", "iomap_readb", "iomap_readw", "iomap_readl", "memcpy_fromio",
> + "mmio_insb", "mmio_insw", "mmio_insl", "readb", "readw", "readl", "readq", "readsb", "readsw", "readsl", "readsq", "__readb", "__readw",
> + "__readl", "__readq", "__readsb", "__readsw", "__readsl", "__readsq", "__raw_readb", "__raw_readw", "__raw_readl", "__raw_readq",
> + "lo_hi_readq", "hi_lo_readq", "lo_hi_readq_relaxed", "hi_lo_readq_relaxed", "readb_relaxed", "readw_relaxed", "readl_relaxed",
> + "readq_relaxed", "native_read_msr", "native_read_msr_safe", "__rdmsr", "rdmsrl", "rdmsrl_safe", "rdmsr_on_cpu", "rdmsrl_on_cpu",
> + "rdmsr_on_cpus", "rdmsr_safe_on_cpu", "rdmsrl_safe_on_cpu", "paravirt_read_msr", "paravirt_read_msr_safe", "read_msr", "msr_read",
> + "native_apic_msr_read", "native_apic_mem_read", "native_apic_icr_read", "apic_read", "apic_icr_read", "native_x2apic_icr_read",
> + "io_apic_read", "native_io_apic_read", "__ioapic_read_entry", "ioapic_read_entry", "vp_ioread8", "vp_ioread16", "vp_ioread32",
> + "__virtio_cread_many", "virtio_cread", "virtio_cread_le", "virtio_cread8", "virtio_cread16", "virtio_cread32", "virtio_cread64",
> + "virtio_cread_bytes", "virtio16_to_cpu", "virtio32_to_cpu", "virtio64_to_cpu", "__virtio16_to_cpu", "__virtio32_to_cpu",
> + "__virtio64_to_cpu", "virtqueue_get_buf", "vringh16_to_cpu", "vringh32_to_cpu", "vringh64_to_cpu", "tap16_to_cpu", "tun16_to_cpu",
> + "read_pci_config", "read_pci_config_byte", "read_pci_config_16", "raw_pci_read", "pci_read", "pci_read_config_byte",
> + "pci_read_config_word", "pci_read_config_dword", "pci_bus_read_config_byte", "pci_bus_read_config_word",
> + "pci_bus_read_config_dword", "pci_generic_config_read", "pci_generic_config_read32", "pci_user_read_config_byte",
> + "pci_user_read_config_word", "pci_user_read_config_dword", "pcie_capability_read_word", "pcie_capability_read_dword",
> + "pci_read_vpd", "serial8250_early_in", "serial_dl_read", "serial8250_in_MCR", "serial_in", "serial_port_in", "serial_icr_read",
> + "serial8250_rx_chars", "dw8250_readl_ext", "udma_readl", "sio_read_reg", "irq_readl_be", "irq_reg_readl", "fw_cfg_read_blob",
> + "acpi_os_read_iomem", "acpi_os_read_port", "acpi_hw_read_multiple", "acpi_hw_read", "acpi_hw_read_port", "acpi_hw_register_read",
> + "acpi_hw_gpe_read", "apei_read", "acpi_read", "__apei_exec_read_register", "cpc_read", "hv_get_register", "iosf_mbi_read",
> + "cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx"
> +
> +};
> +
> +
> +static int my_id;
> +static int my_call_id;
> +
> +STATE(called);
This state is not used here. Even in smatch_kernel_user_data.c it
should be moved out of there into a separate file...
> +static unsigned long func_gets_host_data;
> +static struct stree *start_states;
> +
> +static void save_start_states(struct statement *stmt)
> +{
> + start_states = clone_stree(__get_cur_stree());
> +}
> +
> +static void free_start_states(void)
> +{
> + free_stree(&start_states);
> +}
> +
No need to do this these days. It's stored in get_start_states().
> +static struct smatch_state *empty_state(struct sm_state *sm)
> +{
> + return alloc_estate_empty();
> +}
> +
> +static struct smatch_state *new_state(struct symbol *type)
> +{
> + struct smatch_state *state;
> +
> + if (!type || type_is_ptr(type))
> + return NULL;
> +
> + state = alloc_estate_whole(type);
> + estate_set_new(state);
> +
> + return state;
This code is supposed to differentiate between places where we return
user data because it was passed to us or we got fresh user data inside
the function. In smatch_kernel_user_data.c it's only a single question
"did we recieve user data? Y/N." But it should be "Was variable foo->bar
user data."
I would probably pull returned user data out into a separate file these
days. And have a different file for if a variable currently holds user
data or if we pass user data to a function.
> +}
> +
> +static void pre_merge_hook(struct sm_state *cur, struct sm_state *other)
> +{
> + struct smatch_state *user = cur->state;
> + struct smatch_state *extra;
> + struct smatch_state *state;
> + struct range_list *rl;
> +
> + extra = __get_state(SMATCH_EXTRA, cur->name, cur->sym);
> + if (!extra)
> + return;
> + rl = rl_intersection(estate_rl(user), estate_rl(extra));
> + state = alloc_estate_rl(clone_rl(rl));
> + if (estate_capped(user) || is_capped_var_sym(cur->name, cur->sym))
> + estate_set_capped(state);
> + if (estate_treat_untagged(user))
> + estate_set_treat_untagged(state);
Tagged is for ARM tagged pointers. It's probably not something you
need. Search for "tag" and delete.
> + if (estates_equiv(state, cur->state))
> + return;
> + set_state(my_id, cur->name, cur->sym, state);
> +}
> +
[ snip ]
> +
> +extern uint get_arg_bitmask(struct expression *expr);
> +static struct expression *ignore_param_set;
> +extern bool is_ignored_func(struct expression *expr);
> +
> +static void match_host_input(const char *fn, struct expression *expr)
> +{
> +
> + uint arg_bitmask = 0;
> +
> + if (!expr)
> + return;
> +
> + arg_bitmask = get_arg_bitmask(expr);
> +
> + if (!arg_bitmask) /* function returns host data, handled via match_returns_host_rl */
> + return;
> +
> + if (is_ignored_func(expr))
> + return;
> +
> + func_gets_host_data = true;
> + ignore_param_set = expr;
> +
> + switch((uint)log2(arg_bitmask)) {
> + case 0xC:
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + break;
> + case 0x36:
> + tag_argument(expr, 1);
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + tag_argument(expr, 4);
> + break;
> + case 0x74:
> + tag_argument(expr, 2);
> + tag_argument(expr, 3);
> + tag_argument(expr, 4);
> + tag_argument(expr, 5);
> + break;
> + default:
> + tag_argument(expr, (uint)log2(arg_bitmask));
> + break;
> + }
> +
> + return;
> +}
This function would be better re-written with param key API. Sorry
that Smatch doesn't have documentation. :( An example, of how that
works is in check_unwind.c, instead of a bitmap you would just have
multiple entries in the table.
I'm typing directly into my email client so this might not compile...
struct host_fn_info {
const char *name;
int type;
int param;
const char *key;
const sval_t *implies_start, *implies_end;
func_hook *call_back;
};
static struct host_fn_info func_table[] = {
{ "memcpy_fromio", HOST_DATA, 0, "*$" },
...
{ "cpuid", HOST_DATA, 1, "*$" },
{ "cpuid", HOST_DATA, 2, "*$" },
{ "cpuid", HOST_DATA, 3, "*$" },
{ "cpuid", HOST_DATA, 4, "*$" },
};
static void set_param_host_data(struct expression *expr, const char *name, struct symbol *sym, void *data)
{
struct expression *arg;
struct range_list *rl;
arg = gen_expression_from_name_sym(name, sym);
if (strcmp(key, "*$") == 0) {
tag_as_user_data(arg);
return;
}
// make sure that smatch_extra was run first
get_absolute_rl(arg, &rl);
set_state(my_id, name, sym, alloc_estate_rl(rl));
}
The code you have should work though and it should propagate.
What we need is some simple test cases.
//---
void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
unsigned int a, b, c, d e;
unsigned int test(void)
{
cpuid(0, &a, &b, &c, &d);
__smatch_states("host");
return a;
}
unsigned int test2(void)
{
unsigned int x = test();
__smatch_states("host");
}
regards,
dan carpenter
next prev parent reply other threads:[~2022-03-08 11:35 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-08 8:56 [PATCH] Quick trial on tracing host inputs Elena Reshetova
2022-03-08 11:34 ` Dan Carpenter [this message]
2022-03-08 11:37 ` Dan Carpenter
2022-03-08 12:38 ` Dan Carpenter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220308113416.GG3315@kadam \
--to=dan.carpenter@oracle.com \
--cc=elena.reshetova@intel.com \
--cc=smatch@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.