Skip to content

Commit 5595eb6

Browse files
committed
added a simple .ebuild reader/parser.
1 parent 3330a08 commit 5595eb6

7 files changed

Lines changed: 2753 additions & 2 deletions

File tree

src/ebuild.rs

Lines changed: 360 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
use serde::{Deserialize, Serialize};
2+
use std::collections::HashMap;
3+
use std::fs;
4+
use std::path::Path;
5+
use std::ops::Index;
6+
7+
/// Represents the extracted data of an ebuild file.
8+
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
9+
pub struct EbuildData {
10+
variables: HashMap<String, String>,
11+
}
12+
13+
impl EbuildData {
14+
pub fn new() -> Self {
15+
Self::default()
16+
}
17+
18+
/// Adds a variable (name is stored in lowercase).
19+
pub fn insert(&mut self, name: String, value: String) {
20+
self.variables.insert(name.to_lowercase(), value);
21+
}
22+
23+
/// Retrieves the value of a variable by name (case-insensitive).
24+
pub fn get(&self, name: &str) -> Option<&String> {
25+
self.variables.get(&name.to_lowercase())
26+
}
27+
28+
/// Returns all variables.
29+
pub fn variables(&self) -> &HashMap<String, String> {
30+
&self.variables
31+
}
32+
33+
/// Scans an ebuild file and extracts variable assignments.
34+
pub fn scan<P: AsRef<Path>>(path: P) -> std::io::Result<Self> {
35+
let content = fs::read_to_string(path)?;
36+
Ok(Self::parse(&content))
37+
}
38+
39+
/// Parses the content of an ebuild file.
40+
/// It is not a bash-syntax parser, but rather a simple variable assignment extractor.
41+
pub fn parse(content: &str) -> Self {
42+
let mut data = Self::new();
43+
let mut lines = content.lines().peekable();
44+
45+
while let Some(line) = lines.next() {
46+
let trimmed = line.trim();
47+
48+
// Ignore comments and empty lines
49+
if trimmed.is_empty() || trimmed.starts_with('#') {
50+
continue;
51+
}
52+
53+
// Ignore shell functions: blafasel() { ... }
54+
if (trimmed.contains("()") && (trimmed.contains('{') || lines.peek().map_or(false, |l| l.trim().starts_with('{')))) ||
55+
(trimmed.starts_with("function ") && (trimmed.contains('{') || lines.peek().map_or(false, |l| l.trim().starts_with('{')))) {
56+
// Simple skipping of functions (until the closing brace)
57+
let mut brace_count = 0;
58+
let mut current_line_content = trimmed.to_string();
59+
60+
loop {
61+
brace_count += current_line_content.chars().filter(|&c| c == '{').count();
62+
brace_count -= current_line_content.chars().filter(|&c| c == '}').count();
63+
64+
if brace_count <= 0 && current_line_content.contains('}') {
65+
break;
66+
}
67+
if let Some(next) = lines.next() {
68+
current_line_content = next.trim().to_string();
69+
} else {
70+
break;
71+
}
72+
}
73+
continue;
74+
}
75+
76+
// Detect variable assignments: NAME=VALUE or NAME=( VALUE )
77+
if let Some(eq_idx) = trimmed.find('=') {
78+
let name = trimmed[..eq_idx].trim();
79+
80+
// Validate variable name (must not contain spaces and should start with letter/underscore)
81+
if !name.chars().all(|c| c.is_alphanumeric() || c == '_') || name.is_empty() {
82+
continue;
83+
}
84+
85+
let mut value_part = trimmed[eq_idx + 1..].trim();
86+
87+
// Safety check for empty value_part length when accessing chars (though trim() handles empty)
88+
if value_part.is_empty() && !lines.peek().map_or(false, |l| l.trim().starts_with('(')) {
89+
data.insert(name.to_string(), String::new());
90+
continue;
91+
}
92+
93+
// Remove comments at the end of the line (if not within quotes)
94+
// We search for '#' outside of quotes.
95+
if let Some(hash_idx) = value_part.find('#') {
96+
let prefix = &value_part[..hash_idx];
97+
let quote_count = prefix.chars().filter(|&c| c == '"' || c == '\'').count();
98+
if quote_count % 2 == 0 {
99+
value_part = prefix.trim();
100+
}
101+
}
102+
103+
let raw_value;
104+
105+
if value_part.starts_with('(') || (value_part.is_empty() && lines.peek().map_or(false, |l| l.trim().starts_with('('))) {
106+
// Array assignment
107+
let mut array_content = String::new();
108+
let mut current_part = value_part.to_string();
109+
110+
if current_part.is_empty() {
111+
if let Some(next) = lines.next() {
112+
current_part = next.trim().to_string();
113+
} else {
114+
break;
115+
}
116+
}
117+
118+
if current_part.contains(')') {
119+
let start_idx = current_part.find('(').unwrap_or(0);
120+
if let Some(end_idx) = current_part.rfind(')') {
121+
if current_part.contains('(') {
122+
array_content.push_str(&current_part[start_idx + 1..end_idx]);
123+
} else {
124+
array_content.push_str(&current_part[..end_idx]);
125+
}
126+
}
127+
} else {
128+
if current_part.starts_with('(') {
129+
array_content.push_str(&current_part[1..]);
130+
} else {
131+
array_content.push_str(&current_part);
132+
}
133+
134+
while let Some(next_line) = lines.next() {
135+
let next_trimmed = next_line.trim();
136+
if let Some(end_idx) = next_trimmed.find(')') {
137+
array_content.push(' ');
138+
array_content.push_str(&next_trimmed[..end_idx]);
139+
break;
140+
} else {
141+
array_content.push(' ');
142+
array_content.push_str(next_trimmed);
143+
}
144+
}
145+
}
146+
let raw_val = array_content.replace('\t', " ").trim().to_string();
147+
raw_value = raw_val.split_whitespace().collect::<Vec<_>>().join(" ");
148+
} else if !value_part.is_empty() && ((value_part.starts_with('"') && !value_part[1..].contains('"')) || (value_part.starts_with('\'') && !value_part[1..].contains('\''))) {
149+
// Multi-line assignment with quotes
150+
let quote = value_part.chars().next().unwrap();
151+
let mut quoted_content = value_part[1..].to_string();
152+
153+
while let Some(next_line) = lines.next() {
154+
quoted_content.push(' ');
155+
let next_trimmed = next_line.trim();
156+
if let Some(end_idx) = next_trimmed.find(quote) {
157+
quoted_content.push_str(&next_trimmed[..end_idx]);
158+
break;
159+
} else {
160+
quoted_content.push_str(next_trimmed);
161+
}
162+
}
163+
let raw_val = quoted_content.replace('\t', " ").trim().to_string();
164+
raw_value = raw_val.split_whitespace().collect::<Vec<_>>().join(" ");
165+
} else {
166+
// Simple assignment
167+
if value_part.len() >= 2 && ((value_part.starts_with('"') && value_part.ends_with('"')) ||
168+
(value_part.starts_with('\'') && value_part.ends_with('\''))) {
169+
raw_value = value_part[1..value_part.len() - 1].to_string();
170+
} else {
171+
raw_value = value_part.to_string();
172+
}
173+
}
174+
175+
// Immediate resolution of self-references to support extensions
176+
let mut final_value = raw_value;
177+
if final_value.contains(&format!("${{{}}}", name.to_uppercase())) || final_value.contains(&format!("${}", name.to_uppercase())) {
178+
if let Some(old_val) = data.get(name) {
179+
final_value = final_value.replace(&format!("${{{}}}", name.to_uppercase()), old_val);
180+
final_value = final_value.replace(&format!("${}", name.to_uppercase()), old_val);
181+
}
182+
}
183+
if final_value.contains(&format!("${{{}}}", name.to_lowercase())) || final_value.contains(&format!("${}", name.to_lowercase())) {
184+
if let Some(old_val) = data.get(name) {
185+
final_value = final_value.replace(&format!("${{{}}}", name.to_lowercase()), old_val);
186+
final_value = final_value.replace(&format!("${}", name.to_lowercase()), old_val);
187+
}
188+
}
189+
190+
data.insert(name.to_string(), final_value);
191+
continue;
192+
}
193+
}
194+
195+
// Two-step process for resolving variable references
196+
data.resolve_variables();
197+
198+
data
199+
}
200+
201+
pub fn resolve_variables(&mut self) {
202+
let keys: Vec<String> = self.variables.keys().cloned().collect();
203+
204+
// We do this in two passes to resolve simple dependencies
205+
for _ in 0..2 {
206+
let mut updates = Vec::new();
207+
for key in &keys {
208+
if let Some(value) = self.variables.get(key) {
209+
if value.contains('$') {
210+
let mut new_value = value.clone();
211+
let mut changed = false;
212+
213+
for (vname, vval) in &self.variables {
214+
// Look for ${VAR} or $VAR
215+
let patterns = vec![format!("${{{}}}", vname.to_uppercase()), format!("${}", vname.to_uppercase())];
216+
for pattern in patterns {
217+
if new_value.contains(&pattern) {
218+
new_value = new_value.replace(&pattern, vval);
219+
changed = true;
220+
}
221+
}
222+
223+
// Also support lowercase if needed, ebuilds mostly use uppercase
224+
let patterns_lc = vec![format!("${{{}}}", vname.to_lowercase()), format!("${}", vname.to_lowercase())];
225+
for pattern in patterns_lc {
226+
if new_value.contains(&pattern) {
227+
new_value = new_value.replace(&pattern, vval);
228+
changed = true;
229+
}
230+
}
231+
}
232+
233+
if changed {
234+
updates.push((key.clone(), new_value));
235+
}
236+
}
237+
}
238+
}
239+
240+
for (key, val) in updates {
241+
self.variables.insert(key, val);
242+
}
243+
}
244+
}
245+
}
246+
247+
impl Index<&str> for EbuildData {
248+
type Output = String;
249+
250+
fn index(&self, index: &str) -> &Self::Output {
251+
self.variables.get(&index.to_lowercase()).unwrap_or(&EMPTY_STRING)
252+
}
253+
}
254+
255+
static EMPTY_STRING: String = String::new();
256+
257+
#[cfg(test)]
258+
mod tests {
259+
use super::*;
260+
261+
#[test]
262+
fn test_parse_simple_assignment() {
263+
let content = "EAPI=8\nKEYWORDS=\"~amd64 x86\"";
264+
let data = EbuildData::parse(content);
265+
assert_eq!(data["eapi"], "8");
266+
assert_eq!(data["keywords"], "~amd64 x86");
267+
}
268+
269+
#[test]
270+
fn test_parse_array_assignment() {
271+
let content = "IUSE=( foo bar )";
272+
let data = EbuildData::parse(content);
273+
assert_eq!(data["iuse"], "foo bar");
274+
}
275+
276+
#[test]
277+
fn test_ignore_functions() {
278+
let content = "VAR1=val1\nsrc_compile() {\n emake\n}\nVAR2=val2";
279+
let data = EbuildData::parse(content);
280+
assert_eq!(data["var1"], "val1");
281+
assert_eq!(data["var2"], "val2");
282+
}
283+
284+
#[test]
285+
fn test_resolve_variables() {
286+
let content = "RDEPEND=\"dev-libs/libxml2\"\nDEPEND=\"${RDEPEND}\"";
287+
let data = EbuildData::parse(content);
288+
assert_eq!(data["rdepend"], "dev-libs/libxml2");
289+
assert_eq!(data["depend"], "dev-libs/libxml2");
290+
}
291+
292+
#[test]
293+
fn test_parse_malformed_ebuild() {
294+
// These should not panic
295+
let _ = EbuildData::parse("VAR=(\n");
296+
let _ = EbuildData::parse("VAR=\"\n");
297+
let _ = EbuildData::parse("VAR='");
298+
let _ = EbuildData::parse("VAR=");
299+
let _ = EbuildData::parse("VAR=()");
300+
let _ = EbuildData::parse("function test() {");
301+
}
302+
303+
#[test]
304+
fn test_scan_all_example_files() {
305+
// 1. nginx-1.29.3.ebuild
306+
let data = EbuildData::scan("testdata/ebuild/nginx-1.29.3.ebuild").unwrap();
307+
assert_eq!(data["eapi"], "8");
308+
assert!(data["keywords"].contains("~amd64"));
309+
assert!(data["nginx_subsystems"].contains("+http"));
310+
assert_eq!(data["nginx_update_stream"], "mainline");
311+
assert_eq!(data["nginx_tests_commit"], "06a36245e134eac985cdfc5fac982cb149f61412");
312+
assert!(data["nginx_misc_files"].contains("nginx-{r2.logrotate"));
313+
314+
// 2. perl-5.40.2.ebuild
315+
let data = EbuildData::scan("testdata/ebuild/perl-5.40.2.ebuild").unwrap();
316+
assert_eq!(data["eapi"], "8");
317+
assert_eq!(data["dist_author"], "SHAY");
318+
assert_eq!(data["license"], "|| ( Artistic GPL-1+ )");
319+
assert_eq!(data["homepage"], "https://www.perl.org/");
320+
321+
// 3. php-8.4.14.ebuild
322+
let data = EbuildData::scan("testdata/ebuild/php-8.4.14.ebuild").unwrap();
323+
assert_eq!(data["eapi"], "8");
324+
assert_eq!(data["sapis"], "embed cli cgi fpm apache2 phpdbg");
325+
assert!(data["iuse"].contains("bcmath"));
326+
assert!(data["iuse"].contains("threads")); // From the first IUSE assignment
327+
assert_eq!(data["description"], "The PHP language runtime engine");
328+
assert!(data["license"].contains("PHP-3.01"));
329+
assert!(data["license"].contains("Zend-2.0"));
330+
assert!(data["keywords"].contains("~amd64"));
331+
assert!(data["common_depend"].contains("dev-libs/libpcre2"));
332+
assert!(data["common_depend"].contains("app-crypt/argon2:="));
333+
334+
// 4. postfix-3.10.4.ebuild
335+
let data = EbuildData::scan("testdata/ebuild/postfix-3.10.4.ebuild").unwrap();
336+
assert_eq!(data["eapi"], "8");
337+
assert_eq!(data["description"], "A fast and secure drop-in replacement for sendmail");
338+
assert_eq!(data["homepage"], "https://www.postfix.org/");
339+
assert_eq!(data["license"], "|| ( IBM EPL-2.0 )");
340+
assert_eq!(data["slot"], "0");
341+
assert!(data["keywords"].contains("amd64"));
342+
assert!(data["iuse"].contains("+berkdb"));
343+
assert!(data["iuse"].contains("ldap-bind"));
344+
assert!(data["depend"].contains("acct-group/postfix"));
345+
assert!(data["depend"].contains("ssl? ( >=dev-libs/openssl-1.1.1:0= )"));
346+
347+
// 5. rust-bin-1.89.0.ebuild
348+
let data = EbuildData::scan("testdata/ebuild/rust-bin-1.89.0.ebuild").unwrap();
349+
assert_eq!(data["eapi"], "8");
350+
assert_eq!(data["description"], "Systems programming language from Mozilla");
351+
assert_eq!(data["llvm_optional"], "yes");
352+
assert_eq!(data["homepage"], "https://www.rust-lang.org/");
353+
assert_eq!(data["license"], "|| ( MIT Apache-2.0 ) BSD BSD-1 BSD-2 BSD-4");
354+
assert!(data["keywords"].contains("amd64"));
355+
assert!(data["iuse"].contains("rust-analyzer"));
356+
assert!(data["rdepend"].contains("net-misc/curl"));
357+
// SLOT="${PV%%_*}" resolves to "${PV%%_*}"
358+
assert!(data["qa_prebuilt"].contains("opt/rust-bin-${PV%%_*}/bin/.*"));
359+
}
360+
}

0 commit comments

Comments
 (0)