diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6a92e7c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +target/ +.DS_Store +data/*.tsv diff --git a/Cargo.lock b/Cargo.lock index f508675..c22aa2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -81,6 +81,7 @@ dependencies = [ "fuser", "libc", "log", + "serde_json", ] [[package]] @@ -127,6 +128,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + [[package]] name = "jiff" version = "0.2.24" @@ -253,6 +260,15 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -273,6 +289,19 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "smallvec" version = "1.15.1" @@ -359,3 +388,9 @@ dependencies = [ "quote", "syn", ] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 8971171..16dd66d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,4 @@ fuser = "0.14" libc = "0.2" env_logger = "0.11" log = "0.4" +serde_json = "1" diff --git a/README.md b/README.md index adb27a3..519ceeb 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,83 @@ # CS-FileSystem -User Space File System to populate Cybershuttle Data Sources +This is a user-space filesystem for exposing Cybershuttle data sources. -sudo apt install cargo +This version loads ATLAS metadata from a TSV file and exposes each protein entry +as a directory containing a `metadata.json` file. + +```text +/tmp/atlas_mount/ + atlas/ + 1r6w_A/ + metadata.json + 2y44_A/ + metadata.json +``` + +## Requirements +### Linux + +```bash +sudo apt install cargo sudo apt install -y libfuse3-dev libfuse-dev pkg-config +``` + +### macOS + +```bash +brew install pkgconf +brew install --cask macfuse +``` + +macFUSE may require approval in `System Settings -> Privacy & Security`. + +## ATLAS TSV + +Place the ATLAS metadata TSV somewhere local. The examples below assume the TSV is at: + +```text +data/2024_11_18_ATLAS_info.tsv +``` + +The TSV is not committed to the repository. Create the `data` directory and copy +or download the file there: + +```bash +mkdir -p data +cp /path/to/2024_11_18_ATLAS_info.tsv data/ +``` + +## Run + +Build and mount the filesystem: + +```bash +mkdir -p /tmp/atlas_mount +cargo run --release -- data/2024_11_18_ATLAS_info.tsv /tmp/atlas_mount +``` + +Leave that command running while the filesystem is mounted. + +In another terminal: -cargo build +```bash +ls /tmp/atlas_mount +ls /tmp/atlas_mount/atlas | head +ls /tmp/atlas_mount/atlas/1r6w_A +cat /tmp/atlas_mount/atlas/1r6w_A/metadata.json +``` -mkdir /tmp/myfs -cargo run --release -- /tmp/myfs +## Unmount +Linux: -In a different terminal -ls /tmp/myfs +```bash +fusermount -u /tmp/atlas_mount +``` +macOS: -To unmount -fusermount -u /tmp/myfs +```bash +diskutil unmount /tmp/atlas_mount +``` diff --git a/src/atlas.rs b/src/atlas.rs new file mode 100644 index 0000000..e775d29 --- /dev/null +++ b/src/atlas.rs @@ -0,0 +1,385 @@ +use std::collections::HashMap; +use std::ffi::OsStr; +use std::fs; +use std::io::{BufRead, BufReader}; + +use fuser::{FileAttr, FileType, ReplyDirectory}; + +use crate::{directory_attr, regular_file_attr, InodeGenerator, VirtualDataSource}; + +const METADATA_FILE_NAME: &str = "metadata.json"; + +#[derive(Clone, Debug)] +pub struct AtlasEntry { + pub id: String, + pub metadata_json: String, +} + +#[derive(Clone, Debug)] +struct AtlasEntryNode { + metadata_inode: u64, + id: String, +} + +pub struct AtlasDataSource { + inode: u64, + entry_dirs: HashMap, + entry_name_to_inode: HashMap, + file_contents: HashMap, +} + +impl AtlasDataSource { + pub fn new(entries: Vec, inode_gen: &mut InodeGenerator) -> Self { + let inode = inode_gen.next(); + let mut entry_dirs = HashMap::new(); + let mut entry_name_to_inode = HashMap::new(); + let mut file_contents = HashMap::new(); + + for entry in entries { + let entry_inode = inode_gen.next(); + let metadata_inode = inode_gen.next(); + + entry_name_to_inode.insert(entry.id.clone(), entry_inode); + file_contents.insert(metadata_inode, entry.metadata_json); + + entry_dirs.insert( + entry_inode, + AtlasEntryNode { + metadata_inode, + id: entry.id, + }, + ); + } + + AtlasDataSource { + inode, + entry_dirs, + entry_name_to_inode, + file_contents, + } + } + + pub fn entry_count(&self) -> usize { + self.entry_dirs.len() + } + + fn metadata_attr(&self, metadata_inode: u64) -> Option { + self.file_contents + .get(&metadata_inode) + .map(|content| regular_file_attr(metadata_inode, content.len() as u64)) + } + + fn read_metadata(&self, inode: u64, offset: i64, size: u32) -> Option> { + let content = self.file_contents.get(&inode)?; + let data = content.as_bytes(); + let start = (offset as usize).min(data.len()); + let end = (start + size as usize).min(data.len()); + Some(data[start..end].to_vec()) + } + + fn entries_for_readdir(&self, ino: u64) -> Option> { + if ino == self.inode { + let mut entries = vec![ + (self.inode, FileType::Directory, ".".to_string()), + (self.inode, FileType::Directory, "..".to_string()), + ]; + + let mut entry_dirs: Vec<_> = self.entry_dirs.iter().collect(); + entry_dirs.sort_by(|(_, left), (_, right)| left.id.cmp(&right.id)); + + for (entry_inode, node) in entry_dirs { + entries.push((*entry_inode, FileType::Directory, node.id.clone())); + } + + return Some(entries); + } + + let node = self.entry_dirs.get(&ino)?; + Some(vec![ + (ino, FileType::Directory, ".".to_string()), + (self.inode, FileType::Directory, "..".to_string()), + ( + node.metadata_inode, + FileType::RegularFile, + METADATA_FILE_NAME.to_string(), + ), + ]) + } +} + +pub fn parse_atlas_tsv(tsv_path: &str) -> Vec { + let file = fs::File::open(tsv_path).expect("Failed to open TSV"); + parse_atlas_reader(BufReader::new(file)) +} + +fn parse_atlas_reader(reader: R) -> Vec { + let mut lines = reader.lines(); + let header_line = lines + .next() + .expect("ATLAS TSV is missing a header row") + .expect("Failed to read ATLAS TSV header"); + let headers: Vec = header_line.split('\t').map(str::to_string).collect(); + let mut entries = Vec::new(); + + for line in lines { + let line = line.expect("Failed to read ATLAS TSV row"); + if line.trim().is_empty() { + continue; + } + + let fields: Vec<&str> = line.split('\t').collect(); + let mut map = serde_json::Map::new(); + + for (i, header) in headers.iter().enumerate() { + if let Some(value) = fields.get(i) { + map.insert( + header.to_string(), + serde_json::Value::String(value.to_string()), + ); + } + } + + let id = fields.get(0).unwrap_or(&"unknown").to_string(); + let metadata_json = serde_json::to_string_pretty(&serde_json::Value::Object(map)).unwrap(); + entries.push(AtlasEntry { id, metadata_json }); + } + + entries +} + +pub fn load_atlas_datasource(tsv_path: &str, inode_gen: &mut InodeGenerator) -> AtlasDataSource { + AtlasDataSource::new(parse_atlas_tsv(tsv_path), inode_gen) +} + +impl VirtualDataSource for AtlasDataSource { + fn name(&self) -> &str { + "atlas" + } + + fn inode(&self) -> u64 { + self.inode + } + + fn lookup(&self, parent: u64, name: &OsStr) -> Option { + if parent == self.inode { + let entry_name = name.to_str()?; + let entry_inode = self.entry_name_to_inode.get(entry_name)?; + return Some(directory_attr(*entry_inode)); + } + + let node = self.entry_dirs.get(&parent)?; + if name.to_str() == Some(METADATA_FILE_NAME) { + return self.metadata_attr(node.metadata_inode); + } + + None + } + + fn getattr(&self, ino: u64) -> Option { + if ino == self.inode || self.entry_dirs.contains_key(&ino) { + return Some(directory_attr(ino)); + } + + self.metadata_attr(ino) + } + + fn read(&self, ino: u64, offset: i64, size: u32) -> Option> { + self.read_metadata(ino, offset, size) + } + + fn readdir(&self, ino: u64, offset: i64, reply: &mut ReplyDirectory) -> bool { + let Some(entries) = self.entries_for_readdir(ino) else { + return false; + }; + + for (i, entry) in entries.iter().enumerate().skip(offset as usize) { + if reply.add(entry.0, (i + 1) as i64, entry.1, entry.2.as_str()) { + break; + } + } + + true + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use fuser::FileType; + + use super::*; + + fn sample_entries() -> Vec { + parse_atlas_reader(Cursor::new( + "PDB\tlength\tprotein_name\n1r6w_A\t322\to-succinylbenzoate synthase\n2y44_A\t184\tAlanine-rich surface protein\n", + )) + } + + fn sample_datasource() -> AtlasDataSource { + let mut inode_gen = InodeGenerator::new(); + AtlasDataSource::new(sample_entries(), &mut inode_gen) + } + + #[test] + fn parses_tsv_rows_into_pretty_json_entries() { + let entries = sample_entries(); + + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].id, "1r6w_A"); + + let metadata: serde_json::Value = serde_json::from_str(&entries[0].metadata_json).unwrap(); + assert_eq!(metadata["PDB"], "1r6w_A"); + assert_eq!(metadata["length"], "322"); + assert_eq!(metadata["protein_name"], "o-succinylbenzoate synthase"); + } + + #[test] + fn skips_blank_tsv_rows() { + let entries = parse_atlas_reader(Cursor::new("PDB\tlength\n\n1r6w_A\t322\n\n")); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].id, "1r6w_A"); + } + + #[test] + fn allocates_predictable_inodes_for_atlas_tree() { + let ds = sample_datasource(); + + assert_eq!(ds.inode(), 4); + assert_eq!(ds.entry_count(), 2); + assert_eq!(ds.entry_name_to_inode["1r6w_A"], 5); + assert_eq!(ds.entry_dirs[&5].metadata_inode, 6); + assert_eq!(ds.entry_name_to_inode["2y44_A"], 7); + assert_eq!(ds.entry_dirs[&7].metadata_inode, 8); + } + + #[test] + fn lookup_finds_entry_directory_under_atlas_root() { + let ds = sample_datasource(); + let attr = ds.lookup(ds.inode(), OsStr::new("1r6w_A")).unwrap(); + + assert_eq!(attr.ino, 5); + assert_eq!(attr.kind, FileType::Directory); + assert_eq!(attr.perm, 0o755); + } + + #[test] + fn lookup_finds_metadata_file_inside_entry_directory() { + let ds = sample_datasource(); + let entry_inode = ds.entry_name_to_inode["1r6w_A"]; + let attr = ds + .lookup(entry_inode, OsStr::new(METADATA_FILE_NAME)) + .unwrap(); + + assert_eq!(attr.ino, ds.entry_dirs[&entry_inode].metadata_inode); + assert_eq!(attr.kind, FileType::RegularFile); + assert!(attr.size > 0); + } + + #[test] + fn lookup_rejects_unknown_entry_and_unknown_file() { + let ds = sample_datasource(); + let entry_inode = ds.entry_name_to_inode["1r6w_A"]; + + assert!(ds.lookup(ds.inode(), OsStr::new("missing")).is_none()); + assert!(ds + .lookup(entry_inode, OsStr::new("not_metadata.json")) + .is_none()); + } + + #[test] + fn getattr_reports_atlas_entry_and_metadata_attrs() { + let ds = sample_datasource(); + let entry_inode = ds.entry_name_to_inode["1r6w_A"]; + let metadata_inode = ds.entry_dirs[&entry_inode].metadata_inode; + + assert_eq!(ds.getattr(ds.inode()).unwrap().kind, FileType::Directory); + assert_eq!(ds.getattr(entry_inode).unwrap().kind, FileType::Directory); + + let metadata_attr = ds.getattr(metadata_inode).unwrap(); + assert_eq!(metadata_attr.kind, FileType::RegularFile); + assert_eq!( + metadata_attr.size, + ds.file_contents[&metadata_inode].len() as u64 + ); + } + + #[test] + fn read_returns_sliced_metadata_bytes() { + let ds = sample_datasource(); + let entry_inode = ds.entry_name_to_inode["1r6w_A"]; + let metadata_inode = ds.entry_dirs[&entry_inode].metadata_inode; + let full_content = ds.file_contents[&metadata_inode].clone(); + + assert_eq!( + String::from_utf8(ds.read(metadata_inode, 0, 20).unwrap()).unwrap(), + full_content[..20].to_string() + ); + assert_eq!( + String::from_utf8(ds.read(metadata_inode, 5, 10).unwrap()).unwrap(), + full_content[5..15].to_string() + ); + assert_eq!( + ds.read(metadata_inode, 99_999, 10).unwrap(), + Vec::::new() + ); + } + + #[test] + fn read_rejects_directory_inodes_and_unknown_inodes() { + let ds = sample_datasource(); + + assert!(ds.read(ds.inode(), 0, 10).is_none()); + assert!(ds.read(999, 0, 10).is_none()); + } + + #[test] + fn root_listing_contains_sorted_entry_directories() { + let ds = sample_datasource(); + let entries = ds.entries_for_readdir(ds.inode()).unwrap(); + + assert_eq!( + entries[0], + (ds.inode(), FileType::Directory, ".".to_string()) + ); + assert_eq!( + entries[1], + (ds.inode(), FileType::Directory, "..".to_string()) + ); + assert_eq!(entries[2], (5, FileType::Directory, "1r6w_A".to_string())); + assert_eq!(entries[3], (7, FileType::Directory, "2y44_A".to_string())); + } + + #[test] + fn entry_listing_contains_metadata_json() { + let ds = sample_datasource(); + let entry_inode = ds.entry_name_to_inode["1r6w_A"]; + let metadata_inode = ds.entry_dirs[&entry_inode].metadata_inode; + let entries = ds.entries_for_readdir(entry_inode).unwrap(); + + assert_eq!( + entries[0], + (entry_inode, FileType::Directory, ".".to_string()) + ); + assert_eq!( + entries[1], + (ds.inode(), FileType::Directory, "..".to_string()) + ); + assert_eq!( + entries[2], + ( + metadata_inode, + FileType::RegularFile, + METADATA_FILE_NAME.to_string() + ) + ); + } + + #[test] + fn listing_rejects_unknown_inode() { + let ds = sample_datasource(); + + assert!(ds.entries_for_readdir(999).is_none()); + } +} diff --git a/src/main.rs b/src/main.rs index 10ae177..f196c78 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,9 @@ -// Simple in-memory FUSE filesystem using the `fuser` crate. +// Simple read-only FUSE filesystem using the `fuser` crate. // -// Exposes a single read-only directory containing one file: -// /hello.txt -> "Hello, FUSE from Rust!\n" -// -// Usage: -// mkdir /tmp/myfs -// cargo run --release -- /tmp/myfs -// cat /tmp/myfs/hello.txt -// fusermount -u /tmp/myfs (Ctrl-C also unmounts) +// Top-level directories are implemented by pluggable data sources. The first +// data source is ATLAS, which serves one metadata.json file per protein entry. + +mod atlas; use std::ffi::OsStr; use std::time::{Duration, UNIX_EPOCH}; @@ -18,260 +14,40 @@ use fuser::{ }; use libc::ENOENT; -struct CSFile { - inode_no: u64, - cs_data_id: String, - name: String, -} - -struct CSDirectory { - inode_no: u64, - name: String, - cs_data_id: String, - files: Vec, - directories: Vec, -} - -struct DataSource { - inode_no: u64, - name: String, - directories: Vec, - files: Vec, -} - -fn lookup_inode_in_directory(dir: &CSDirectory, inode_no: u64) -> Option { - if inode_no == dir.inode_no { - return Some(FileAttr { - ino: dir.inode_no, - size: 0, - blocks: 0, - atime: UNIX_EPOCH, - mtime: UNIX_EPOCH, - ctime: UNIX_EPOCH, - crtime: UNIX_EPOCH, - kind: FileType::Directory, - perm: 0o755, - nlink: 2, - uid: unsafe { libc::getuid() }, - gid: unsafe { libc::getgid() }, - rdev: 0, - flags: 0, - blksize: 512, - }); - } - - for file in &dir.files { - if inode_no == file.inode_no { - return Some(FileAttr { - ino: file.inode_no, - size: 0, - blocks: 0, - atime: UNIX_EPOCH, - mtime: UNIX_EPOCH, - ctime: UNIX_EPOCH, - crtime: UNIX_EPOCH, - kind: FileType::RegularFile, - perm: 0o644, - nlink: 1, - uid: unsafe { libc::getuid() }, - gid: unsafe { libc::getgid() }, - rdev: 0, - flags: 0, - blksize: 512, - }); - } - } - - for subdir in &dir.directories { - if let Some(attr) = lookup_inode_in_directory(subdir, inode_no) { - return Some(attr); - } - } - - None -} - -fn lookup_attr_in_directory(dir: &CSDirectory, name: &OsStr, parent_inode: u64) -> Option { - - for file in &dir.files { - if parent_inode == dir.inode_no && name.to_str() == Some(file.name.as_str()) { - return Some(FileAttr { - ino: file.inode_no, - size: 0, - blocks: 0, - atime: UNIX_EPOCH, - mtime: UNIX_EPOCH, - ctime: UNIX_EPOCH, - crtime: UNIX_EPOCH, - kind: FileType::RegularFile, - perm: 0o644, - nlink: 1, - uid: unsafe { libc::getuid() }, - gid: unsafe { libc::getgid() }, - rdev: 0, - flags: 0, - blksize: 512, - }); - } - } - - for subdir in &dir.directories { - if parent_inode == dir.inode_no && name.to_str() == Some(subdir.name.as_str()) { - return Some(FileAttr { - ino: subdir.inode_no, - size: 0, - blocks: 0, - atime: UNIX_EPOCH, - mtime: UNIX_EPOCH, - ctime: UNIX_EPOCH, - crtime: UNIX_EPOCH, - kind: FileType::Directory, - perm: 0o755, - nlink: 2, - uid: unsafe { libc::getuid() }, - gid: unsafe { libc::getgid() }, - rdev: 0, - flags: 0, - blksize: 512, - }); - } - } - - for subdir in &dir.directories { - if let Some(attr) = lookup_attr_in_directory(subdir, name, parent_inode) { - return Some(attr); - } - } - - - None -} - - -impl DataSource { - - fn new(inode_no: u64, name: String, directories: Vec, files: Vec) -> Self { - DataSource { inode_no, name, directories, files } - } - - fn get_attr(&self, ) -> FileAttr { - FileAttr { - ino: self.inode_no, - size: 0, - blocks: 0, - atime: UNIX_EPOCH, - mtime: UNIX_EPOCH, - ctime: UNIX_EPOCH, - crtime: UNIX_EPOCH, - kind: FileType::Directory, - perm: 0o755, - nlink: 2, - uid: unsafe { libc::getuid() }, - gid: unsafe { libc::getgid() }, - rdev: 0, - flags: 0, - blksize: 512, - } - } - - fn lookup_by_inode(&self, inode_no: u64) -> Option { - if inode_no == self.inode_no { - return Some(self.get_attr()); - } - - for file in &self.files { - if inode_no == file.inode_no { - return Some(FileAttr { - ino: file.inode_no, - size: 0, - blocks: 0, - atime: UNIX_EPOCH, - mtime: UNIX_EPOCH, - ctime: UNIX_EPOCH, - crtime: UNIX_EPOCH, - kind: FileType::RegularFile, - perm: 0o644, - nlink: 1, - uid: unsafe { libc::getuid() }, - gid: unsafe { libc::getgid() }, - rdev: 0, - flags: 0, - blksize: 512, - }); - } - } - - for dir in &self.directories { - if let Some(attr) = lookup_inode_in_directory(dir, inode_no) { - return Some(attr); - } - } - None - } - +const TTL: Duration = Duration::from_secs(1); +const ROOT_INO: u64 = 1; +const HELLO_INO: u64 = 2; +const HELLO_CONTENT: &str = "Hello, FUSE from Rust!\n"; - fn lookup_by_name(&self, parent_inode: u64, name: &OsStr) -> Option { - for file in &self.files { - if parent_inode == self.inode_no && name.to_str() == Some(file.name.as_str()) { - return Some(FileAttr { - ino: file.inode_no, - size: 0, - blocks: 0, - atime: UNIX_EPOCH, - mtime: UNIX_EPOCH, - ctime: UNIX_EPOCH, - crtime: UNIX_EPOCH, - kind: FileType::RegularFile, - perm: 0o644, - nlink: 1, - uid: unsafe { libc::getuid() }, - gid: unsafe { libc::getgid() }, - rdev: 0, - flags: 0, - blksize: 512, - }); - } - } - - for dir in &self.directories { - if let Some(attr) = lookup_attr_in_directory(dir, name, parent_inode) { - return Some(attr); - } - } - None - } +pub trait VirtualDataSource { + fn name(&self) -> &str; + fn inode(&self) -> u64; + fn lookup(&self, parent: u64, name: &OsStr) -> Option; + fn getattr(&self, ino: u64) -> Option; + fn read(&self, ino: u64, offset: i64, size: u32) -> Option>; + fn readdir(&self, ino: u64, offset: i64, reply: &mut ReplyDirectory) -> bool; } - -// implement an incrementing inode number generator -struct InodeGenerator { +pub struct InodeGenerator { current: u64, -} +} impl InodeGenerator { fn new() -> Self { - InodeGenerator { current: 4 } // Start from 4 since 1, 2, and 3 are already used + InodeGenerator { current: 4 } } - fn next(&mut self) -> u64 { + pub fn next(&mut self) -> u64 { let inode = self.current; self.current += 1; inode } } - -const TTL: Duration = Duration::from_secs(1); - -const HELLO_CONTENT: &str = "Hello, FUSE from Rust!\n"; - -const ROOT_INO: u64 = 1; -const HELLO_INO: u64 = 2; - -fn root_attr() -> FileAttr { +pub fn directory_attr(ino: u64) -> FileAttr { FileAttr { - ino: ROOT_INO, + ino, size: 0, blocks: 0, atime: UNIX_EPOCH, @@ -289,10 +65,10 @@ fn root_attr() -> FileAttr { } } -fn hello_attr() -> FileAttr { +pub fn regular_file_attr(ino: u64, size: u64) -> FileAttr { FileAttr { - ino: HELLO_INO, - size: HELLO_CONTENT.len() as u64, + ino, + size, blocks: 1, atime: UNIX_EPOCH, mtime: UNIX_EPOCH, @@ -308,19 +84,30 @@ fn hello_attr() -> FileAttr { blksize: 512, } } + +fn root_attr() -> FileAttr { + directory_attr(ROOT_INO) +} + +fn hello_attr() -> FileAttr { + regular_file_attr(HELLO_INO, HELLO_CONTENT.len() as u64) +} + struct CybershuttleFS { - data_sources: Vec, + data_sources: Vec>, } impl Filesystem for CybershuttleFS { fn lookup(&mut self, _req: &Request<'_>, parent: u64, name: &OsStr, reply: ReplyEntry) { - for ds in &self.data_sources { - if parent == ROOT_INO && name.to_str() == Some(ds.name.as_str()) { - reply.entry(&TTL, &ds.get_attr(), 0); - return; + if parent == ROOT_INO && name.to_str() == Some(ds.name()) { + if let Some(attr) = ds.getattr(ds.inode()) { + reply.entry(&TTL, &attr, 0); + return; + } } - if let Some(attr) = ds.lookup_by_name(parent, name) { + + if let Some(attr) = ds.lookup(parent, name) { reply.entry(&TTL, &attr, 0); return; } @@ -339,7 +126,7 @@ impl Filesystem for CybershuttleFS { HELLO_INO => reply.attr(&TTL, &hello_attr()), _ => { for ds in &self.data_sources { - if let Some(attr) = ds.lookup_by_inode(ino) { + if let Some(attr) = ds.getattr(ino) { reply.attr(&TTL, &attr); return; } @@ -360,14 +147,22 @@ impl Filesystem for CybershuttleFS { _lock_owner: Option, reply: ReplyData, ) { - if ino != HELLO_INO { - reply.error(ENOENT); + if ino == HELLO_INO { + let data = HELLO_CONTENT.as_bytes(); + let start = (offset as usize).min(data.len()); + let end = (start + size as usize).min(data.len()); + reply.data(&data[start..end]); return; } - let data = HELLO_CONTENT.as_bytes(); - let start = (offset as usize).min(data.len()); - let end = (start + size as usize).min(data.len()); - reply.data(&data[start..end]); + + for ds in &self.data_sources { + if let Some(data) = ds.read(ino, offset, size) { + reply.data(&data); + return; + } + } + + reply.error(ENOENT); } fn readdir( @@ -378,56 +173,32 @@ impl Filesystem for CybershuttleFS { offset: i64, mut reply: ReplyDirectory, ) { - if ino == ROOT_INO { - // Add the "hello.txt" entry to the root directory let mut entries = vec![ (ROOT_INO, FileType::Directory, "."), (ROOT_INO, FileType::Directory, ".."), ]; - - for ds in self.data_sources.iter() { - entries.push((ds.inode_no, FileType::Directory, ds.name.as_str())); + + for ds in &self.data_sources { + entries.push((ds.inode(), FileType::Directory, ds.name())); } for (i, entry) in entries.iter().enumerate().skip(offset as usize) { - // i + 1 is the next offset to resume from. if reply.add(entry.0, (i + 1) as i64, entry.1, entry.2) { break; } } reply.ok(); return; - } else { + } - for ds in &self.data_sources { - if let Some(attr) = ds.lookup_by_inode(ino) { - if attr.kind == FileType::Directory { - let mut entries = vec![ - (attr.ino, FileType::Directory, "."), - (ROOT_INO, FileType::Directory, ".."), - ]; - - for file in &ds.files { - entries.push((file.inode_no, FileType::RegularFile, file.name.as_str())); - } - - for dir in &ds.directories { - entries.push((dir.inode_no, FileType::Directory, dir.name.as_str())); - } - - for (i, entry) in entries.iter().enumerate().skip(offset as usize) { - // i + 1 is the next offset to resume from. - if reply.add(entry.0, (i + 1) as i64, entry.1, entry.2) { - break; - } - } - reply.ok(); - return; - } - } + for ds in &self.data_sources { + if ds.readdir(ino, offset, &mut reply) { + reply.ok(); + return; } } + reply.error(ENOENT); } } @@ -435,68 +206,30 @@ impl Filesystem for CybershuttleFS { fn main() { env_logger::init(); - let mountpoint = std::env::args_os().nth(1).unwrap_or_else(|| { - eprintln!("Usage: cs-filesystem "); + let args: Vec = std::env::args().collect(); + if args.len() < 3 { + eprintln!("Usage: cs-filesystem "); std::process::exit(1); - }); - - let options = vec![ - MountOption::RO, - MountOption::FSName("cybershuttlefs".to_string()), - MountOption::AutoUnmount, - MountOption::AllowOther, - ]; + } + let tsv_path = &args[1]; + let mountpoint = &args[2]; let mut inode_gen = InodeGenerator::new(); + let atlas_ds = atlas::load_atlas_datasource(tsv_path, &mut inode_gen); + println!("Loaded {} ATLAS entries", atlas_ds.entry_count()); - let alp_dirs = vec![ - CSDirectory { - inode_no: inode_gen.next(), - name: "pdb".to_string(), - cs_data_id: "alphafold_pdb".to_string(), - files: vec![], - directories: vec![], - }, - CSDirectory { - inode_no: inode_gen.next(), - name: "fasta".to_string(), - cs_data_id: "alphafold_fasta".to_string(), - files: vec![], - directories: vec![], - }, - ]; - - - let alp_files = vec![ - CSFile { - inode_no: inode_gen.next(), - cs_data_id: "alphafold_summary".to_string(), - name: "summary.txt".to_string(), - }, - ]; - - - let protein_data_dirs = vec![ - CSDirectory { - inode_no: inode_gen.next(), - name: "uniprot".to_string(), - cs_data_id: "protein_data_uniprot".to_string(), - files: vec![], - directories: vec![], - }, - ]; + let fs = CybershuttleFS { + data_sources: vec![Box::new(atlas_ds)], + }; - - let data_sources = vec![ - DataSource::new(inode_gen.next(), "alphafold".to_string(), alp_dirs, alp_files), - DataSource::new(inode_gen.next(), "protein_data".to_string(), protein_data_dirs, vec![]), + let options = vec![ + MountOption::RO, + MountOption::FSName("cybershuttlefs".to_string()), + MountOption::AutoUnmount, ]; - - let fs = CybershuttleFS { data_sources }; - - if let Err(e) = fuser::mount2(fs, &mountpoint, &options) { + if let Err(e) = fuser::mount2(fs, mountpoint, &options) { eprintln!("Failed to mount filesystem: {e}"); std::process::exit(1); }