|
| 1 | +use serde::{Deserialize, Serialize}; |
| 2 | +use std::collections::HashMap; |
| 3 | +use std::fs; |
| 4 | +use std::path::Path; |
| 5 | +use std::ops::Index; |
| 6 | + |
| 7 | +/// Represents the extracted data of an ebuild file. |
| 8 | +#[derive(Debug, Default, Clone, Serialize, Deserialize)] |
| 9 | +pub struct EbuildData { |
| 10 | + variables: HashMap<String, String>, |
| 11 | +} |
| 12 | + |
| 13 | +impl EbuildData { |
| 14 | + pub fn new() -> Self { |
| 15 | + Self::default() |
| 16 | + } |
| 17 | + |
| 18 | + /// Adds a variable (name is stored in lowercase). |
| 19 | + pub fn insert(&mut self, name: String, value: String) { |
| 20 | + self.variables.insert(name.to_lowercase(), value); |
| 21 | + } |
| 22 | + |
| 23 | + /// Retrieves the value of a variable by name (case-insensitive). |
| 24 | + pub fn get(&self, name: &str) -> Option<&String> { |
| 25 | + self.variables.get(&name.to_lowercase()) |
| 26 | + } |
| 27 | + |
| 28 | + /// Returns all variables. |
| 29 | + pub fn variables(&self) -> &HashMap<String, String> { |
| 30 | + &self.variables |
| 31 | + } |
| 32 | + |
| 33 | + /// Scans an ebuild file and extracts variable assignments. |
| 34 | + pub fn scan<P: AsRef<Path>>(path: P) -> std::io::Result<Self> { |
| 35 | + let content = fs::read_to_string(path)?; |
| 36 | + Ok(Self::parse(&content)) |
| 37 | + } |
| 38 | + |
| 39 | + /// Parses the content of an ebuild file. |
| 40 | + /// It is not a bash-syntax parser, but rather a simple variable assignment extractor. |
| 41 | + pub fn parse(content: &str) -> Self { |
| 42 | + let mut data = Self::new(); |
| 43 | + let mut lines = content.lines().peekable(); |
| 44 | + |
| 45 | + while let Some(line) = lines.next() { |
| 46 | + let trimmed = line.trim(); |
| 47 | + |
| 48 | + // Ignore comments and empty lines |
| 49 | + if trimmed.is_empty() || trimmed.starts_with('#') { |
| 50 | + continue; |
| 51 | + } |
| 52 | + |
| 53 | + // Ignore shell functions: blafasel() { ... } |
| 54 | + if (trimmed.contains("()") && (trimmed.contains('{') || lines.peek().map_or(false, |l| l.trim().starts_with('{')))) || |
| 55 | + (trimmed.starts_with("function ") && (trimmed.contains('{') || lines.peek().map_or(false, |l| l.trim().starts_with('{')))) { |
| 56 | + // Simple skipping of functions (until the closing brace) |
| 57 | + let mut brace_count = 0; |
| 58 | + let mut current_line_content = trimmed.to_string(); |
| 59 | + |
| 60 | + loop { |
| 61 | + brace_count += current_line_content.chars().filter(|&c| c == '{').count(); |
| 62 | + brace_count -= current_line_content.chars().filter(|&c| c == '}').count(); |
| 63 | + |
| 64 | + if brace_count <= 0 && current_line_content.contains('}') { |
| 65 | + break; |
| 66 | + } |
| 67 | + if let Some(next) = lines.next() { |
| 68 | + current_line_content = next.trim().to_string(); |
| 69 | + } else { |
| 70 | + break; |
| 71 | + } |
| 72 | + } |
| 73 | + continue; |
| 74 | + } |
| 75 | + |
| 76 | + // Detect variable assignments: NAME=VALUE or NAME=( VALUE ) |
| 77 | + if let Some(eq_idx) = trimmed.find('=') { |
| 78 | + let name = trimmed[..eq_idx].trim(); |
| 79 | + |
| 80 | + // Validate variable name (must not contain spaces and should start with letter/underscore) |
| 81 | + if !name.chars().all(|c| c.is_alphanumeric() || c == '_') || name.is_empty() { |
| 82 | + continue; |
| 83 | + } |
| 84 | + |
| 85 | + let mut value_part = trimmed[eq_idx + 1..].trim(); |
| 86 | + |
| 87 | + // Safety check for empty value_part length when accessing chars (though trim() handles empty) |
| 88 | + if value_part.is_empty() && !lines.peek().map_or(false, |l| l.trim().starts_with('(')) { |
| 89 | + data.insert(name.to_string(), String::new()); |
| 90 | + continue; |
| 91 | + } |
| 92 | + |
| 93 | + // Remove comments at the end of the line (if not within quotes) |
| 94 | + // We search for '#' outside of quotes. |
| 95 | + if let Some(hash_idx) = value_part.find('#') { |
| 96 | + let prefix = &value_part[..hash_idx]; |
| 97 | + let quote_count = prefix.chars().filter(|&c| c == '"' || c == '\'').count(); |
| 98 | + if quote_count % 2 == 0 { |
| 99 | + value_part = prefix.trim(); |
| 100 | + } |
| 101 | + } |
| 102 | + |
| 103 | + let raw_value; |
| 104 | + |
| 105 | + if value_part.starts_with('(') || (value_part.is_empty() && lines.peek().map_or(false, |l| l.trim().starts_with('('))) { |
| 106 | + // Array assignment |
| 107 | + let mut array_content = String::new(); |
| 108 | + let mut current_part = value_part.to_string(); |
| 109 | + |
| 110 | + if current_part.is_empty() { |
| 111 | + if let Some(next) = lines.next() { |
| 112 | + current_part = next.trim().to_string(); |
| 113 | + } else { |
| 114 | + break; |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + if current_part.contains(')') { |
| 119 | + let start_idx = current_part.find('(').unwrap_or(0); |
| 120 | + if let Some(end_idx) = current_part.rfind(')') { |
| 121 | + if current_part.contains('(') { |
| 122 | + array_content.push_str(¤t_part[start_idx + 1..end_idx]); |
| 123 | + } else { |
| 124 | + array_content.push_str(¤t_part[..end_idx]); |
| 125 | + } |
| 126 | + } |
| 127 | + } else { |
| 128 | + if current_part.starts_with('(') { |
| 129 | + array_content.push_str(¤t_part[1..]); |
| 130 | + } else { |
| 131 | + array_content.push_str(¤t_part); |
| 132 | + } |
| 133 | + |
| 134 | + while let Some(next_line) = lines.next() { |
| 135 | + let next_trimmed = next_line.trim(); |
| 136 | + if let Some(end_idx) = next_trimmed.find(')') { |
| 137 | + array_content.push(' '); |
| 138 | + array_content.push_str(&next_trimmed[..end_idx]); |
| 139 | + break; |
| 140 | + } else { |
| 141 | + array_content.push(' '); |
| 142 | + array_content.push_str(next_trimmed); |
| 143 | + } |
| 144 | + } |
| 145 | + } |
| 146 | + let raw_val = array_content.replace('\t', " ").trim().to_string(); |
| 147 | + raw_value = raw_val.split_whitespace().collect::<Vec<_>>().join(" "); |
| 148 | + } else if !value_part.is_empty() && ((value_part.starts_with('"') && !value_part[1..].contains('"')) || (value_part.starts_with('\'') && !value_part[1..].contains('\''))) { |
| 149 | + // Multi-line assignment with quotes |
| 150 | + let quote = value_part.chars().next().unwrap(); |
| 151 | + let mut quoted_content = value_part[1..].to_string(); |
| 152 | + |
| 153 | + while let Some(next_line) = lines.next() { |
| 154 | + quoted_content.push(' '); |
| 155 | + let next_trimmed = next_line.trim(); |
| 156 | + if let Some(end_idx) = next_trimmed.find(quote) { |
| 157 | + quoted_content.push_str(&next_trimmed[..end_idx]); |
| 158 | + break; |
| 159 | + } else { |
| 160 | + quoted_content.push_str(next_trimmed); |
| 161 | + } |
| 162 | + } |
| 163 | + let raw_val = quoted_content.replace('\t', " ").trim().to_string(); |
| 164 | + raw_value = raw_val.split_whitespace().collect::<Vec<_>>().join(" "); |
| 165 | + } else { |
| 166 | + // Simple assignment |
| 167 | + if value_part.len() >= 2 && ((value_part.starts_with('"') && value_part.ends_with('"')) || |
| 168 | + (value_part.starts_with('\'') && value_part.ends_with('\''))) { |
| 169 | + raw_value = value_part[1..value_part.len() - 1].to_string(); |
| 170 | + } else { |
| 171 | + raw_value = value_part.to_string(); |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + // Immediate resolution of self-references to support extensions |
| 176 | + let mut final_value = raw_value; |
| 177 | + if final_value.contains(&format!("${{{}}}", name.to_uppercase())) || final_value.contains(&format!("${}", name.to_uppercase())) { |
| 178 | + if let Some(old_val) = data.get(name) { |
| 179 | + final_value = final_value.replace(&format!("${{{}}}", name.to_uppercase()), old_val); |
| 180 | + final_value = final_value.replace(&format!("${}", name.to_uppercase()), old_val); |
| 181 | + } |
| 182 | + } |
| 183 | + if final_value.contains(&format!("${{{}}}", name.to_lowercase())) || final_value.contains(&format!("${}", name.to_lowercase())) { |
| 184 | + if let Some(old_val) = data.get(name) { |
| 185 | + final_value = final_value.replace(&format!("${{{}}}", name.to_lowercase()), old_val); |
| 186 | + final_value = final_value.replace(&format!("${}", name.to_lowercase()), old_val); |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + data.insert(name.to_string(), final_value); |
| 191 | + continue; |
| 192 | + } |
| 193 | + } |
| 194 | + |
| 195 | + // Two-step process for resolving variable references |
| 196 | + data.resolve_variables(); |
| 197 | + |
| 198 | + data |
| 199 | + } |
| 200 | + |
| 201 | + pub fn resolve_variables(&mut self) { |
| 202 | + let keys: Vec<String> = self.variables.keys().cloned().collect(); |
| 203 | + |
| 204 | + // We do this in two passes to resolve simple dependencies |
| 205 | + for _ in 0..2 { |
| 206 | + let mut updates = Vec::new(); |
| 207 | + for key in &keys { |
| 208 | + if let Some(value) = self.variables.get(key) { |
| 209 | + if value.contains('$') { |
| 210 | + let mut new_value = value.clone(); |
| 211 | + let mut changed = false; |
| 212 | + |
| 213 | + for (vname, vval) in &self.variables { |
| 214 | + // Look for ${VAR} or $VAR |
| 215 | + let patterns = vec![format!("${{{}}}", vname.to_uppercase()), format!("${}", vname.to_uppercase())]; |
| 216 | + for pattern in patterns { |
| 217 | + if new_value.contains(&pattern) { |
| 218 | + new_value = new_value.replace(&pattern, vval); |
| 219 | + changed = true; |
| 220 | + } |
| 221 | + } |
| 222 | + |
| 223 | + // Also support lowercase if needed, ebuilds mostly use uppercase |
| 224 | + let patterns_lc = vec![format!("${{{}}}", vname.to_lowercase()), format!("${}", vname.to_lowercase())]; |
| 225 | + for pattern in patterns_lc { |
| 226 | + if new_value.contains(&pattern) { |
| 227 | + new_value = new_value.replace(&pattern, vval); |
| 228 | + changed = true; |
| 229 | + } |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + if changed { |
| 234 | + updates.push((key.clone(), new_value)); |
| 235 | + } |
| 236 | + } |
| 237 | + } |
| 238 | + } |
| 239 | + |
| 240 | + for (key, val) in updates { |
| 241 | + self.variables.insert(key, val); |
| 242 | + } |
| 243 | + } |
| 244 | + } |
| 245 | +} |
| 246 | + |
| 247 | +impl Index<&str> for EbuildData { |
| 248 | + type Output = String; |
| 249 | + |
| 250 | + fn index(&self, index: &str) -> &Self::Output { |
| 251 | + self.variables.get(&index.to_lowercase()).unwrap_or(&EMPTY_STRING) |
| 252 | + } |
| 253 | +} |
| 254 | + |
| 255 | +static EMPTY_STRING: String = String::new(); |
| 256 | + |
| 257 | +#[cfg(test)] |
| 258 | +mod tests { |
| 259 | + use super::*; |
| 260 | + |
| 261 | + #[test] |
| 262 | + fn test_parse_simple_assignment() { |
| 263 | + let content = "EAPI=8\nKEYWORDS=\"~amd64 x86\""; |
| 264 | + let data = EbuildData::parse(content); |
| 265 | + assert_eq!(data["eapi"], "8"); |
| 266 | + assert_eq!(data["keywords"], "~amd64 x86"); |
| 267 | + } |
| 268 | + |
| 269 | + #[test] |
| 270 | + fn test_parse_array_assignment() { |
| 271 | + let content = "IUSE=( foo bar )"; |
| 272 | + let data = EbuildData::parse(content); |
| 273 | + assert_eq!(data["iuse"], "foo bar"); |
| 274 | + } |
| 275 | + |
| 276 | + #[test] |
| 277 | + fn test_ignore_functions() { |
| 278 | + let content = "VAR1=val1\nsrc_compile() {\n emake\n}\nVAR2=val2"; |
| 279 | + let data = EbuildData::parse(content); |
| 280 | + assert_eq!(data["var1"], "val1"); |
| 281 | + assert_eq!(data["var2"], "val2"); |
| 282 | + } |
| 283 | + |
| 284 | + #[test] |
| 285 | + fn test_resolve_variables() { |
| 286 | + let content = "RDEPEND=\"dev-libs/libxml2\"\nDEPEND=\"${RDEPEND}\""; |
| 287 | + let data = EbuildData::parse(content); |
| 288 | + assert_eq!(data["rdepend"], "dev-libs/libxml2"); |
| 289 | + assert_eq!(data["depend"], "dev-libs/libxml2"); |
| 290 | + } |
| 291 | + |
| 292 | + #[test] |
| 293 | + fn test_parse_malformed_ebuild() { |
| 294 | + // These should not panic |
| 295 | + let _ = EbuildData::parse("VAR=(\n"); |
| 296 | + let _ = EbuildData::parse("VAR=\"\n"); |
| 297 | + let _ = EbuildData::parse("VAR='"); |
| 298 | + let _ = EbuildData::parse("VAR="); |
| 299 | + let _ = EbuildData::parse("VAR=()"); |
| 300 | + let _ = EbuildData::parse("function test() {"); |
| 301 | + } |
| 302 | + |
| 303 | + #[test] |
| 304 | + fn test_scan_all_example_files() { |
| 305 | + // 1. nginx-1.29.3.ebuild |
| 306 | + let data = EbuildData::scan("testdata/ebuild/nginx-1.29.3.ebuild").unwrap(); |
| 307 | + assert_eq!(data["eapi"], "8"); |
| 308 | + assert!(data["keywords"].contains("~amd64")); |
| 309 | + assert!(data["nginx_subsystems"].contains("+http")); |
| 310 | + assert_eq!(data["nginx_update_stream"], "mainline"); |
| 311 | + assert_eq!(data["nginx_tests_commit"], "06a36245e134eac985cdfc5fac982cb149f61412"); |
| 312 | + assert!(data["nginx_misc_files"].contains("nginx-{r2.logrotate")); |
| 313 | + |
| 314 | + // 2. perl-5.40.2.ebuild |
| 315 | + let data = EbuildData::scan("testdata/ebuild/perl-5.40.2.ebuild").unwrap(); |
| 316 | + assert_eq!(data["eapi"], "8"); |
| 317 | + assert_eq!(data["dist_author"], "SHAY"); |
| 318 | + assert_eq!(data["license"], "|| ( Artistic GPL-1+ )"); |
| 319 | + assert_eq!(data["homepage"], "https://www.perl.org/"); |
| 320 | + |
| 321 | + // 3. php-8.4.14.ebuild |
| 322 | + let data = EbuildData::scan("testdata/ebuild/php-8.4.14.ebuild").unwrap(); |
| 323 | + assert_eq!(data["eapi"], "8"); |
| 324 | + assert_eq!(data["sapis"], "embed cli cgi fpm apache2 phpdbg"); |
| 325 | + assert!(data["iuse"].contains("bcmath")); |
| 326 | + assert!(data["iuse"].contains("threads")); // From the first IUSE assignment |
| 327 | + assert_eq!(data["description"], "The PHP language runtime engine"); |
| 328 | + assert!(data["license"].contains("PHP-3.01")); |
| 329 | + assert!(data["license"].contains("Zend-2.0")); |
| 330 | + assert!(data["keywords"].contains("~amd64")); |
| 331 | + assert!(data["common_depend"].contains("dev-libs/libpcre2")); |
| 332 | + assert!(data["common_depend"].contains("app-crypt/argon2:=")); |
| 333 | + |
| 334 | + // 4. postfix-3.10.4.ebuild |
| 335 | + let data = EbuildData::scan("testdata/ebuild/postfix-3.10.4.ebuild").unwrap(); |
| 336 | + assert_eq!(data["eapi"], "8"); |
| 337 | + assert_eq!(data["description"], "A fast and secure drop-in replacement for sendmail"); |
| 338 | + assert_eq!(data["homepage"], "https://www.postfix.org/"); |
| 339 | + assert_eq!(data["license"], "|| ( IBM EPL-2.0 )"); |
| 340 | + assert_eq!(data["slot"], "0"); |
| 341 | + assert!(data["keywords"].contains("amd64")); |
| 342 | + assert!(data["iuse"].contains("+berkdb")); |
| 343 | + assert!(data["iuse"].contains("ldap-bind")); |
| 344 | + assert!(data["depend"].contains("acct-group/postfix")); |
| 345 | + assert!(data["depend"].contains("ssl? ( >=dev-libs/openssl-1.1.1:0= )")); |
| 346 | + |
| 347 | + // 5. rust-bin-1.89.0.ebuild |
| 348 | + let data = EbuildData::scan("testdata/ebuild/rust-bin-1.89.0.ebuild").unwrap(); |
| 349 | + assert_eq!(data["eapi"], "8"); |
| 350 | + assert_eq!(data["description"], "Systems programming language from Mozilla"); |
| 351 | + assert_eq!(data["llvm_optional"], "yes"); |
| 352 | + assert_eq!(data["homepage"], "https://www.rust-lang.org/"); |
| 353 | + assert_eq!(data["license"], "|| ( MIT Apache-2.0 ) BSD BSD-1 BSD-2 BSD-4"); |
| 354 | + assert!(data["keywords"].contains("amd64")); |
| 355 | + assert!(data["iuse"].contains("rust-analyzer")); |
| 356 | + assert!(data["rdepend"].contains("net-misc/curl")); |
| 357 | + // SLOT="${PV%%_*}" resolves to "${PV%%_*}" |
| 358 | + assert!(data["qa_prebuilt"].contains("opt/rust-bin-${PV%%_*}/bin/.*")); |
| 359 | + } |
| 360 | +} |
0 commit comments