From ccc8c48991be8ca85db5d59bed9a5e7e8b8a17ab Mon Sep 17 00:00:00 2001 From: LesterEvSe Date: Thu, 9 Apr 2026 18:08:00 +0300 Subject: [PATCH] feat: add linearize and build functionality to the driver --- src/driver/linearization.rs | 4 +- src/driver/mod.rs | 30 ++++ src/driver/resolve_order.rs | 313 ++++++++++++++++++++++++++++++++++++ 3 files changed, 344 insertions(+), 3 deletions(-) create mode 100644 src/driver/resolve_order.rs diff --git a/src/driver/linearization.rs b/src/driver/linearization.rs index 8b9f32a..7f8b48f 100644 --- a/src/driver/linearization.rs +++ b/src/driver/linearization.rs @@ -6,8 +6,6 @@ //! Because we do not need to enforce strict local precedence, a standard post-order //! DFS is a better option. -// TODO: Remove this once the code is actively used. -#![allow(dead_code)] use std::collections::HashSet; use std::fmt; @@ -16,7 +14,7 @@ use crate::driver::DependencyGraph; /// This is a core component of the [`DependencyGraph`]. impl DependencyGraph { /// Returns the deterministic, BOTTOM-UP load order of dependencies. - pub fn linearize(&self) -> Result, LinearizationError> { + pub(crate) fn linearize(&self) -> Result, LinearizationError> { let mut visited = HashSet::new(); let mut visiting = Vec::new(); let mut order = Vec::new(); diff --git a/src/driver/mod.rs b/src/driver/mod.rs index 34227d4..72e036e 100644 --- a/src/driver/mod.rs +++ b/src/driver/mod.rs @@ -1,4 +1,34 @@ +//! The `driver` module is responsible for module resolution and dependency management. +//! +//! Our compiler operates in a strict pipeline: `Lexer -> Parser -> Driver -> AST`. +//! While the Parser only understands a single file at a time, the Driver processes +//! multiple files, resolves their dependencies, and converts them into a unified +//! structure ready for final AST construction. +//! +//! # Architecture +//! +//! ## Dependency Graph & Linearization +//! +//! The driver parses the root file and recursively discovers all imported modules +//! to build a Directed Acyclic Graph (DAG) of the project's dependencies. Because +//! the final AST requires a flat array of items, the driver applies a deterministic +//! linearization strategy to this DAG. This safely flattens the multi-file project +//! into a single, logically ordered sequence, strictly enforcing visibility rules +//! and preventing duplicate imports. +//! +//! ## Project Structure & Entry Point +//! +//! SimplicityHL does not define a "project root" directory. Instead, the compiler +//! relies on a single entry point: the file passed as the first positional argument. +//! This file must contain the `main` function, which serves as the program's +//! starting point. +//! +//! External libraries are explicitly linked using the `--dep` flag. The driver +//! resolves and parses these external files relative to the entry point during +//! the dependency graph construction. + mod linearization; +mod resolve_order; use std::collections::{HashMap, HashSet, VecDeque}; use std::path::PathBuf; diff --git a/src/driver/resolve_order.rs b/src/driver/resolve_order.rs new file mode 100644 index 0000000..4525353 --- /dev/null +++ b/src/driver/resolve_order.rs @@ -0,0 +1,313 @@ +use std::collections::{BTreeSet, HashMap}; +use std::sync::Arc; + +use crate::driver::{CanonSourceFile, DependencyGraph}; +use crate::error::{Error, ErrorCollector, RichError, Span}; +use crate::impl_eq_hash; +use crate::parse::{self, Visibility}; +use crate::resolution::CanonPath; + +/// The final, flattened representation of a SimplicityHL program. +/// +/// This struct holds the fully resolved sequence of items, paths, and scope +/// resolutions, ready to be passed to the next stage of the compiler. +#[derive(Clone, Debug)] +pub struct Program { + /// The linear sequence of compiled items (`Functions`, `TypeAliases`, etc.). + items: Arc<[parse::Item]>, + + /// The files that make up this program, along with their scoping rules. + files: Arc<[ResolvedFile]>, + + span: Span, +} + +impl Program { + pub fn items(&self) -> &[parse::Item] { + &self.items + } + + pub fn files(&self) -> &[ResolvedFile] { + &self.files + } + + pub fn span(&self) -> &Span { + &self.span + } +} + +impl_eq_hash!(Program; items, files); + +/// Represents a single source file alongside its resolved scoping and visibility rules. +#[derive(Clone, Debug)] +pub struct ResolvedFile { + path: CanonPath, + + /// The set of resolved item names available within this file's scope. + // Use BTreeSet instead of HashMap for the impl_eq_hash! macro. + resolutions: BTreeSet>, +} + +impl ResolvedFile { + pub fn path(&self) -> &CanonPath { + &self.path + } + + pub fn resolutions(&self) -> &BTreeSet> { + &self.resolutions + } +} + +impl_eq_hash!(ResolvedFile; path, resolutions); + +/// This is a core component of the [`DependencyGraph`]. +impl DependencyGraph { + /// Resolves the dependency graph and constructs the final AST program. + pub fn linearize_and_build( + &self, + handler: &mut ErrorCollector, + ) -> Result, String> { + match self.linearize() { + Ok(order) => Ok(self.build_program(&order, handler)), + Err(err) => Err(err.to_string()), + } + } + + /// Constructs the unified AST for the entire program. + fn build_program(&self, order: &[usize], handler: &mut ErrorCollector) -> Option { + let mut items: Vec = Vec::new(); + let mut resolutions: Vec, Visibility>> = + vec![HashMap::new(); self.modules.len()]; + + for &file_id in order { + let module = &self.modules[file_id]; + let source = &module.source; + + for elem in module.parsed_program.items() { + // 1. Handle Uses (Early Continue flattens the nesting) + if let parse::Item::Use(use_decl) = elem { + let resolve_path = + match self.dependency_map.resolve_path(source.name(), use_decl) { + Ok(path) => path, + Err(err) => { + handler.push(err.with_source(source.clone())); + continue; + } + }; + + let ind = self.lookup[&resolve_path]; + let use_decl_items = match use_decl.items() { + parse::UseItems::Single(elem) => std::slice::from_ref(elem), + parse::UseItems::List(elems) => elems.as_slice(), + }; + + for item in use_decl_items { + if let Err(err) = Self::process_use_item( + &mut resolutions, + (source, file_id), + ind, + Arc::from(item.as_inner()), + use_decl, + ) { + handler.push(err); + } + } + continue; + } + + // 2. Ignore Modules + if matches!(elem, parse::Item::Module) { + continue; + } + + // 3. Handle Types & Functions + let (name, vis) = match elem { + parse::Item::TypeAlias(a) => (a.name().as_inner(), a.visibility()), + parse::Item::Function(f) => (f.name().as_inner(), f.visibility()), + _ => unreachable!(), + }; + + items.push(elem.clone()); + resolutions[file_id].insert(Arc::from(name), vis.clone()); + } + } + + if handler.has_errors() { + None + } else { + Some(Program { + items: items.into(), + files: construct_resolved_file_array(&self.paths, &resolutions), + span: *self.modules[0].parsed_program.as_ref(), + }) + } + } + + /// Processes a single imported item during the module resolution phase. + /// + /// # Arguments + /// + /// * `resolutions` - A mutable slice of hash maps, where each index corresponds to a module's ID and holds its resolved items and their visibilities. + /// * `(source, id)` - A tuple containing the [`CanonSourceFile`] for error reporting and the `usize` identifier of the destination source. + /// * `ind` - The unique identifier (`usize`) of the source module being imported *from*. + /// * `name` - The specific item name (`Arc`) being imported from the source. + /// * `use_decl` - The AST node of the `use` statement. This dictates the visibility of the newly imported item in the destination module. + /// + /// # Returns + /// + /// Returns `None` on success. Returns `Some(RichError)` if: + /// * [`Error::UnresolvedItem`]: The target `name` does not exist in the source module (`ind`). + /// * [`Error::PrivateItem`]: The target exists in the source module, but its visibility is expl + fn process_use_item( + resolutions: &mut [HashMap, Visibility>], + (source, id): (&CanonSourceFile, usize), + ind: usize, + name: Arc, + use_decl: &parse::UseDecl, + ) -> Result<(), RichError> { + let build_path = || source.name().as_path().to_path_buf(); + let span = *use_decl.span(); + + let visibility = resolutions[ind].get(&name).ok_or_else(|| { + RichError::new( + Error::UnresolvedItem { + name: name.to_string(), + target_file: build_path(), + }, + span, + ) + .with_source(source.clone()) + })?; + + if matches!(visibility, parse::Visibility::Private) { + return Err(RichError::new( + Error::PrivateItem { + name: name.to_string(), + target_file: build_path(), + }, + span, + ) + .with_source(source.clone())); + } + + resolutions[id].insert(name, use_decl.visibility().clone()); + Ok(()) + } +} + +fn construct_resolved_file_array( + paths: &[CanonPath], + resolutions: &[HashMap, Visibility>], +) -> Arc<[ResolvedFile]> { + let mut result = Vec::with_capacity(paths.len()); + + for i in 0..paths.len() { + let file_resolutions: BTreeSet> = resolutions[i].keys().cloned().collect(); + + result.push(ResolvedFile { + path: paths[i].clone(), + resolutions: file_resolutions, + }); + } + + result.into() +} + +#[cfg(test)] +mod tests { + use crate::driver::tests::setup_graph; + + use super::*; + + #[test] + fn test_local_definitions_visibility() { + // main.simf defines a private function and a public function. + // Expected: Both should appear in the scope with correct visibility. + + let (graph, ids, _dir) = setup_graph(vec![( + "main.simf", + "fn private_fn() {} pub fn public_fn() {}", + )]); + + let mut error_handler = ErrorCollector::new(); + let program_option = graph.linearize_and_build(&mut error_handler).unwrap(); + + let Some(program) = program_option else { + panic!("{}", error_handler); + }; + + let root_id = ids["main"]; + let resolutions = &program.files[root_id].resolutions; + + resolutions + .get(&Arc::from("private_fn")) + .expect("private_fn missing"); + + resolutions + .get(&Arc::from("public_fn")) + .expect("public_fn missing"); + } + + #[test] + fn test_pub_use_propagation() { + // Scenario: Re-exporting. + // 1. A.simf defines `pub fn foo`. + // 2. B.simf imports it and re-exports it via `pub use`. + // 3. main.simf imports it from B. + // Expected: B's scope must contain `foo` marked as Public. + + let (graph, ids, _dir) = setup_graph(vec![ + ("libs/lib/A.simf", "pub fn foo() {}"), + ("libs/lib/B.simf", "pub use lib::A::foo;"), + ("main.simf", "use lib::B::foo;"), + ]); + + let mut error_handler = ErrorCollector::new(); + let program_option = graph.linearize_and_build(&mut error_handler).unwrap(); + + let Some(program) = program_option else { + panic!("{}", error_handler); + }; + + let id_b = ids["B"]; + let id_root = ids["main"]; + + // Check B's scope + program.files[id_b] + .resolutions + .get(&Arc::from("foo")) + .expect("foo missing in B"); + + // Check Root's scope + program.files[id_root] + .resolutions + .get(&Arc::from("foo")) + .expect("foo missing in Root"); + } + + #[test] + fn test_private_import_encapsulation_error() { + // Scenario: Access violation. + // 1. A.simf defines `pub fn foo`. + // 2. B.simf imports it via `use` (Private import). + // 3. main.simf tries to import `foo` from B. + // Expected: Error, because B did not re-export foo. + + let (graph, _ids, _dir) = setup_graph(vec![ + ("libs/lib/A.simf", "pub fn foo() {}"), + ("libs/lib/B.simf", "use lib::A::foo;"), // <--- Private binding! + ("main.simf", "use lib::B::foo;"), // <--- Should fail + ]); + + let mut error_handler = ErrorCollector::new(); + let program_option = graph.linearize_and_build(&mut error_handler).unwrap(); + + assert!( + program_option.is_none(), + "Build should fail and return None when importing a private binding" + ); + assert!(error_handler + .to_string() + .contains(&"Item `foo` is private".to_string())); + } +}