Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions fact-ebpf/src/bpf/events.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ __always_inline static void __submit_event(struct event_t* event,
file_activity_type_t event_type,
const char filename[PATH_MAX],
inode_key_t* inode,
inode_key_t* parent_inode,
bool use_bpf_d_path) {
event->type = event_type;
event->timestamp = bpf_ktime_get_boot_ns();
inode_copy_or_reset(&event->inode, inode);
inode_copy_or_reset(&event->parent_inode, parent_inode);
bpf_probe_read_str(event->filename, PATH_MAX, filename);

struct helper_t* helper = get_helper();
Expand All @@ -46,31 +48,34 @@ __always_inline static void __submit_event(struct event_t* event,
__always_inline static void submit_open_event(struct metrics_by_hook_t* m,
file_activity_type_t event_type,
const char filename[PATH_MAX],
inode_key_t* inode) {
inode_key_t* inode,
inode_key_t* parent_inode) {
struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0);
if (event == NULL) {
m->ringbuffer_full++;
return;
}

__submit_event(event, m, event_type, filename, inode, true);
__submit_event(event, m, event_type, filename, inode, parent_inode, true);
}

__always_inline static void submit_unlink_event(struct metrics_by_hook_t* m,
const char filename[PATH_MAX],
inode_key_t* inode) {
inode_key_t* inode,
inode_key_t* parent_inode) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The parent inode was added to the event so that it could be used to find the parent path. It is added for all event types, not just when a file is opened. Perhaps NULL could be passed instead of the actual parent inode for the other type of events for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, passing NULL should be enough for now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0);
if (event == NULL) {
m->ringbuffer_full++;
return;
}

__submit_event(event, m, FILE_ACTIVITY_UNLINK, filename, inode, path_hooks_support_bpf_d_path);
__submit_event(event, m, FILE_ACTIVITY_UNLINK, filename, inode, parent_inode, path_hooks_support_bpf_d_path);
}

__always_inline static void submit_mode_event(struct metrics_by_hook_t* m,
const char filename[PATH_MAX],
inode_key_t* inode,
inode_key_t* parent_inode,
umode_t mode,
umode_t old_mode) {
struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0);
Expand All @@ -82,12 +87,13 @@ __always_inline static void submit_mode_event(struct metrics_by_hook_t* m,
event->chmod.new = mode;
event->chmod.old = old_mode;

__submit_event(event, m, FILE_ACTIVITY_CHMOD, filename, inode, path_hooks_support_bpf_d_path);
__submit_event(event, m, FILE_ACTIVITY_CHMOD, filename, inode, parent_inode, path_hooks_support_bpf_d_path);
}

__always_inline static void submit_ownership_event(struct metrics_by_hook_t* m,
const char filename[PATH_MAX],
inode_key_t* inode,
inode_key_t* parent_inode,
unsigned long long uid,
unsigned long long gid,
unsigned long long old_uid,
Expand All @@ -103,14 +109,15 @@ __always_inline static void submit_ownership_event(struct metrics_by_hook_t* m,
event->chown.old.uid = old_uid;
event->chown.old.gid = old_gid;

__submit_event(event, m, FILE_ACTIVITY_CHOWN, filename, inode, path_hooks_support_bpf_d_path);
__submit_event(event, m, FILE_ACTIVITY_CHOWN, filename, inode, parent_inode, path_hooks_support_bpf_d_path);
}

__always_inline static void submit_rename_event(struct metrics_by_hook_t* m,
const char new_filename[PATH_MAX],
const char old_filename[PATH_MAX],
inode_key_t* new_inode,
inode_key_t* old_inode) {
inode_key_t* old_inode,
inode_key_t* new_parent_inode) {
struct event_t* event = bpf_ringbuf_reserve(&rb, sizeof(struct event_t), 0);
if (event == NULL) {
m->ringbuffer_full++;
Expand All @@ -120,5 +127,5 @@ __always_inline static void submit_rename_event(struct metrics_by_hook_t* m,
bpf_probe_read_str(event->rename.old_filename, PATH_MAX, old_filename);
inode_copy_or_reset(&event->rename.old_inode, old_inode);

__submit_event(event, m, FILE_ACTIVITY_RENAME, new_filename, new_inode, path_hooks_support_bpf_d_path);
__submit_event(event, m, FILE_ACTIVITY_RENAME, new_filename, new_inode, new_parent_inode, path_hooks_support_bpf_d_path);
}
16 changes: 12 additions & 4 deletions fact-ebpf/src/bpf/inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,21 @@ __always_inline static inode_key_t inode_to_key(struct inode* inode) {
return key;
}

unsigned long magic = inode->i_sb->s_magic;
unsigned long magic = BPF_CORE_READ(inode, i_sb, s_magic);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inode_to_key needs to be able to use untrusted pointers, so that it can get the key for a parent inode. To get it to be able to use untrusted pointers, BPF_CORE_READ is used.

switch (magic) {
case BTRFS_SUPER_MAGIC:
if (bpf_core_type_exists(struct btrfs_inode)) {
struct btrfs_inode* btrfs_inode = container_of(inode, struct btrfs_inode, vfs_inode);
key.inode = inode->i_ino;
key.inode = BPF_CORE_READ(inode, i_ino);
key.dev = BPF_CORE_READ(btrfs_inode, root, anon_dev);
break;
}
// If the btrfs_inode does not exist, most likely it is not
// supported on the system. Fallback to the generic implementation
// just in case.
default:
key.inode = inode->i_ino;
key.dev = inode->i_sb->s_dev;
key.inode = BPF_CORE_READ(inode, i_ino);
key.dev = BPF_CORE_READ(inode, i_sb, s_dev);
break;
}

Expand All @@ -65,6 +65,14 @@ __always_inline static inode_value_t* inode_get(struct inode_key_t* inode) {
return bpf_map_lookup_elem(&inode_map, inode);
}

__always_inline static long inode_add(struct inode_key_t* inode) {
if (inode == NULL) {
return -1;
}
inode_value_t value = 0;
return bpf_map_update_elem(&inode_map, inode, &value, BPF_ANY);
}

__always_inline static long inode_remove(struct inode_key_t* inode) {
if (inode == NULL) {
return 0;
Expand Down
23 changes: 20 additions & 3 deletions fact-ebpf/src/bpf/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,24 @@ int BPF_PROG(trace_file_open, struct file* file) {
inode_key_t inode_key = inode_to_key(file->f_inode);
inode_key_t* inode_to_submit = &inode_key;

// Extract parent inode
struct dentry* parent_dentry = BPF_CORE_READ(file, f_path.dentry, d_parent);
struct inode* parent_inode_ptr = parent_dentry ? BPF_CORE_READ(parent_dentry, d_inode) : NULL;
inode_key_t parent_key = inode_to_key(parent_inode_ptr);

// For file creation events, check if the parent directory is being
// monitored. If so, add the new file's inode to the tracked set.
if (event_type == FILE_ACTIVITY_CREATION) {
if (inode_is_monitored(inode_get(&parent_key)) == MONITORED) {
inode_add(&inode_key);
}
}
Comment on lines +55 to +61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move some of this logic to is_monitored? We can change it to return an enum with values like MONITORED, NOT_MONITORED and PARENT_MONITORED, then we can decide what to do based on that returned value (i.e, add the inode if the event is creation and the parent is monitored).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe. I tried this locally. It seemed to involve a lot of changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial attempt didn't work. I might try again.


if (!is_monitored(inode_key, path, &inode_to_submit)) {
goto ignored;
}

submit_open_event(&m->file_open, event_type, path->path, inode_to_submit);
submit_open_event(&m->file_open, event_type, path->path, inode_to_submit, &parent_key);

return 0;

Expand Down Expand Up @@ -86,7 +99,8 @@ int BPF_PROG(trace_path_unlink, struct path* dir, struct dentry* dentry) {

submit_unlink_event(&m->path_unlink,
path->path,
inode_to_submit);
inode_to_submit,
NULL);
return 0;
}

Expand Down Expand Up @@ -118,6 +132,7 @@ int BPF_PROG(trace_path_chmod, struct path* path, umode_t mode) {
submit_mode_event(&m->path_chmod,
bound_path->path,
inode_to_submit,
NULL,
mode,
old_mode);

Expand Down Expand Up @@ -158,6 +173,7 @@ int BPF_PROG(trace_path_chown, struct path* path, unsigned long long uid, unsign
submit_ownership_event(&m->path_chown,
bound_path->path,
inode_to_submit,
NULL,
uid,
gid,
old_uid,
Expand Down Expand Up @@ -207,7 +223,8 @@ int BPF_PROG(trace_path_rename, struct path* old_dir,
new_path->path,
old_path->path,
old_inode_submit,
new_inode_submit);
new_inode_submit,
NULL);
return 0;

error:
Expand Down
1 change: 1 addition & 0 deletions fact-ebpf/src/bpf/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct event_t {
process_t process;
char filename[PATH_MAX];
inode_key_t inode;
inode_key_t parent_inode;
file_activity_type_t type;
union {
struct {
Expand Down
33 changes: 29 additions & 4 deletions fact/src/event/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ impl Event {
filename,
host_file,
inode: Default::default(),
parent_inode: Default::default(),
};
let file = match data {
EventTestData::Creation => FileData::Creation(inner),
Expand Down Expand Up @@ -125,6 +126,10 @@ impl Event {
})
}

pub fn is_creation(&self) -> bool {
matches!(self.file, FileData::Creation(_))
}

/// Unwrap the inner FileData and return the inode that triggered
/// the event.
///
Expand All @@ -141,6 +146,18 @@ impl Event {
}
}

/// Get the parent inode for the file in this event.
pub fn get_parent_inode(&self) -> &inode_key_t {
match &self.file {
FileData::Open(data) => &data.parent_inode,
FileData::Creation(data) => &data.parent_inode,
FileData::Unlink(data) => &data.parent_inode,
FileData::Chmod(data) => &data.inner.parent_inode,
FileData::Chown(data) => &data.inner.parent_inode,
FileData::Rename(data) => &data.new.parent_inode,
}
}

/// Same as `get_inode` but returning the 'old' inode for operations
/// like rename. For operations that involve a single inode, `None`
/// will be returned.
Expand All @@ -151,7 +168,7 @@ impl Event {
}
}

fn get_filename(&self) -> &PathBuf {
pub fn get_filename(&self) -> &PathBuf {
match &self.file {
FileData::Open(data) => &data.filename,
FileData::Creation(data) => &data.filename,
Expand Down Expand Up @@ -233,6 +250,7 @@ impl TryFrom<&event_t> for Event {
value.type_,
value.filename,
value.inode,
value.parent_inode,
value.__bindgen_anon_1,
)?;

Expand Down Expand Up @@ -282,9 +300,10 @@ impl FileData {
event_type: file_activity_type_t,
filename: [c_char; PATH_MAX as usize],
inode: inode_key_t,
parent_inode: inode_key_t,
extra_data: fact_ebpf::event_t__bindgen_ty_1,
) -> anyhow::Result<Self> {
let inner = BaseFileData::new(filename, inode)?;
let inner = BaseFileData::new(filename, inode, parent_inode)?;
let file = match event_type {
file_activity_type_t::FILE_ACTIVITY_OPEN => FileData::Open(inner),
file_activity_type_t::FILE_ACTIVITY_CREATION => FileData::Creation(inner),
Expand Down Expand Up @@ -312,7 +331,7 @@ impl FileData {
let old_inode = unsafe { extra_data.rename.old_inode };
let data = RenameFileData {
new: inner,
old: BaseFileData::new(old_filename, old_inode)?,
old: BaseFileData::new(old_filename, old_inode, Default::default())?,
};
FileData::Rename(data)
}
Expand Down Expand Up @@ -376,14 +395,20 @@ pub struct BaseFileData {
pub filename: PathBuf,
host_file: PathBuf,
inode: inode_key_t,
parent_inode: inode_key_t,
}

impl BaseFileData {
pub fn new(filename: [c_char; PATH_MAX as usize], inode: inode_key_t) -> anyhow::Result<Self> {
pub fn new(
filename: [c_char; PATH_MAX as usize],
inode: inode_key_t,
parent_inode: inode_key_t,
) -> anyhow::Result<Self> {
Ok(BaseFileData {
filename: sanitize_d_path(&filename),
host_file: PathBuf::new(), // this field is set by HostScanner
inode,
parent_inode,
})
}
}
Expand Down
57 changes: 53 additions & 4 deletions fact/src/host_scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ impl HostScanner {
self.update_entry(path.as_path()).with_context(|| {
format!("Failed to update entry for {}", path.display())
})?;
} else if path.is_dir() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is slightly related to a conversation I've been having with @Stringy. IMO, in the case of finding a directory that matches a glob pattern, we should recursively scan into it, the reason being that tracking the directory itself isn't of much value if the pattern doesn't hit any files inside the directory.

For now we can leave it like this, but I think we need to take a close look at what users might expect when configuring different types of glob patterns (i.e do they want that strict set of patterns to be monitored, or are they expressing a set of paths that work as the base to be monitored?)

self.metrics.scan_inc(ScanLabels::DirectoryScanned);
self.update_entry(path.as_path()).with_context(|| {
format!("Failed to update entry for {}", path.display())
})?;
} else {
self.metrics.scan_inc(ScanLabels::FsItemIgnored);
}
Expand All @@ -154,17 +159,26 @@ impl HostScanner {
dev: metadata.st_dev(),
};

let host_path = host_info::remove_host_mount(path);
self.update_entry_with_inode(&inode, host_path)?;

debug!("Added entry for {}: {inode:?}", path.display());
Ok(())
}

/// Similar to update_entry except we are are directly using the inode instead of the path.
fn update_entry_with_inode(&self, inode: &inode_key_t, path: PathBuf) -> anyhow::Result<()> {
self.kernel_inode_map
.borrow_mut()
.insert(inode, 0, 0)
.insert(*inode, 0, 0)
.with_context(|| format!("Failed to insert kernel entry for {}", path.display()))?;

let mut inode_map = self.inode_map.borrow_mut();
let entry = inode_map.entry(inode).or_default();
*entry = host_info::remove_host_mount(path);
let entry = inode_map.entry(*inode).or_default();
*entry = path;

self.metrics.scan_inc(ScanLabels::FileUpdated);

debug!("Added entry for {}: {inode:?}", path.display());
Ok(())
}

Expand All @@ -178,6 +192,34 @@ impl HostScanner {
self.inode_map.borrow().get(inode?).cloned()
}

/// Handle file creation events by adding new inodes to the map.
///
/// We use the parent inode provided by the eBPF code
/// to look up the parent directory's host path, then construct the full
/// path by appending the new file's name.
fn handle_creation_event(&self, event: &Event) -> anyhow::Result<()> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm hoping this is still WIP and the debug statements are for testing, we really don't need a debug statement on each return condition, particularly not in the hot path where monitoring sets of files mildly noisy will just flood the logs and make it hard to see what is actually going on.

Please cut down on the debug statements, if something is expected to happen, don't put a statement there.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried removing all the debug statements in this method and managed to boil it down from 50+ lines to just this:

        let inode = event.get_inode();
        let parent_inode = event.get_parent_inode();
        if self.get_host_path(Some(inode)).is_some() || parent_inode.empty() {
            return Ok(());
        }

        if let Some(filename) = event.get_filename().file_name()
            && let Some(parent_host_path) = self.get_host_path(Some(parent_inode))
        {
            let host_path = parent_host_path.join(filename);
            self.update_entry_with_inode(inode, host_path)
                .with_context(|| {
                    format!(
                        "Failed to add creation event entry for {}",
                        filename.display()
                    )
                })?;
        }

        Ok(())

Given this is a lot more compact and easy to ready and the additional verbosity in the logs the implementation in the PR doesn't bring much of a benefit IMO, I would propose we try to move the implementation closer to this.

let inode = event.get_inode();
let parent_inode = event.get_parent_inode();
if self.get_host_path(Some(inode)).is_some() || parent_inode.empty() {
return Ok(());
}

if let Some(filename) = event.get_filename().file_name() {
if let Some(parent_host_path) = self.get_host_path(Some(parent_inode)) {
let host_path = parent_host_path.join(filename);
self.update_entry_with_inode(inode, host_path)
.with_context(|| {
format!(
"Failed to add creation event entry for {}",
filename.display()
)
})?;
}
}

Ok(())
}

/// Periodically notify the host scanner main task that a scan needs
/// to happen.
///
Expand Down Expand Up @@ -219,6 +261,13 @@ impl HostScanner {
};
self.metrics.events.added();

// Handle file creation events by adding new inodes to the map
if event.is_creation() {
if let Err(e) = self.handle_creation_event(&event) {
warn!("Failed to handle creation event: {e}");
}
}

if let Some(host_path) = self.get_host_path(Some(event.get_inode())) {
self.metrics.scan_inc(ScanLabels::InodeHit);
event.set_host_path(host_path);
Expand Down
Loading
Loading