Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
542 changes: 493 additions & 49 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ path = "src/main.rs"

[dependencies]
anyhow = "1.0"
clap = { version = "4.5", features = ["derive"] }
clap = { version = "4.6", features = ["derive"] }
crc32fast = "1.5"
nohash-hasher = "0.2.0"
quick-xml = "0.39.2"
smallvec = "1.15"
sha1_smol = { version = "1.0", features = ["std"] }
flate2 = "1"
lzma-rs = "0.3"
zip = { version = "8", default-features = false, features = ["deflate"] }
zip = { version = "8", default-features = false, features = ["aes-crypto", "deflate-flate2"] }
sha1 = "0.10"
zstd = "0.13"

17 changes: 15 additions & 2 deletions src/gex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ class GeneratedTask(BaseTask):
));
out.push_str(" contents = f.read()\n");
}
CandidateSource::Zip { archive, member } => {
CandidateSource::Zip {
archive,
member,
password,
} => {
let archive_name = archive
.file_name()
.map(|s| s.to_string_lossy())
Expand All @@ -114,7 +118,16 @@ class GeneratedTask(BaseTask):
out.push_str(&format!(
" with zipfile.ZipFile(os.path.join(in_dir, {py_archive})) as z:\n"
));
out.push_str(&format!(" with z.open({py_member}) as f:\n"));
if let Some(pw) = password {
let py_pw = py_str(pw);
// NOTE: zipfile only supports ZipCrypto; AES-encrypted
// archives need pyzipper or another AES-capable library.
out.push_str(&format!(
" with z.open({py_member}, pwd={py_pw}.encode()) as f:\n"
));
} else {
out.push_str(&format!(" with z.open({py_member}) as f:\n"));
Comment thread
pathawks marked this conversation as resolved.
}
Comment thread
pathawks marked this conversation as resolved.
out.push_str(" contents = f.read()\n");
}
CandidateSource::Kpka { archive, index } => {
Expand Down
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub fn apply_found<F>(
where
F: FnMut(&RomInfo, &[u8]) -> anyhow::Result<()>,
{
use sha1_smol::Sha1;
use sha1::{Digest, Sha1};
use types::MatchedData;

let MatchedData::Spec(ref spec) = found.data;
Expand All @@ -53,12 +53,12 @@ where
let mut hasher = Sha1::new();
hasher.update(roms[rid].header.as_ref().unwrap());
hasher.update(&bytes_owned);
hasher.digest().to_string()
format!("{:x}", hasher.finalize())
})
.as_str()
} else {
if sha1_cache.is_none() {
sha1_cache = Some(Sha1::from(&bytes_owned[..]).digest().to_string());
sha1_cache = Some(format!("{:x}", Sha1::digest(&bytes_owned[..])));
}
sha1_cache.as_ref().unwrap().as_str()
};
Expand Down
9 changes: 7 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,18 @@ struct Opt {
#[arg(long)]
no_expand: bool,

/// Passwords to try on encrypted zip members (may be repeated)
#[arg(long)]
password: Vec<String>,

/// Verbose logging (-v)
#[arg(short, long)]
verbose: bool,
}

fn run_extract(opt: &Opt, spec: chisel::types::ExtractionSpec) -> anyhow::Result<()> {
std::fs::create_dir_all(&opt.output_dir)?;
let cands = utils::load_candidates_from_paths(&opt.input_files, opt.verbose)?;
let cands = utils::load_candidates_from_paths(&opt.input_files, &opt.password, opt.verbose)?;
for cand in &cands {
let mut s = spec.clone();
if s.size == 0 {
Expand Down Expand Up @@ -110,7 +114,8 @@ fn main() -> anyhow::Result<()> {
.as_ref()
.ok_or_else(|| anyhow::anyhow!("either --dat or --spec is required"))?;
let mut roms = load_rom_list(dat, opt.game.as_deref())?;
let mut cands = utils::load_candidates_from_paths(&opt.input_files, opt.verbose)?;
let mut cands =
utils::load_candidates_from_paths(&opt.input_files, &opt.password, opt.verbose)?;

if opt.gex.is_none() {
std::fs::create_dir_all(&opt.output_dir)?;
Expand Down
1 change: 1 addition & 0 deletions src/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ fn cost_ratio(work: u64, value: u64) -> u64 {
((work as f64) / (value.max(1) as f64)).ceil() as u64
}

#[allow(clippy::too_many_arguments)]
pub fn run_pipeline(
roms: &mut [RomInfo],
cands: &mut [Candidate],
Expand Down
6 changes: 3 additions & 3 deletions src/test_support.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::types::{Candidate, CandidateSource, Found, Heuristic, MatchRecord, Pending, RomInfo};
use crc32fast;
use sha1_smol::Sha1;
use sha1::{Digest, Sha1};
use std::collections::HashMap;

pub fn make_rom(name: &str, data: &[u8]) -> RomInfo {
Expand All @@ -9,7 +9,7 @@ pub fn make_rom(name: &str, data: &[u8]) -> RomInfo {
game: String::new(),
size: data.len(),
crc32: crc32fast::hash(data),
sha1: Some(Sha1::from(data).digest().to_string()),
sha1: Some(format!("{:x}", Sha1::digest(data))),
matched: false,
unverified: false,
region: None,
Expand All @@ -25,7 +25,7 @@ pub fn make_rom_with_header(name: &str, header: &[u8], content: &[u8]) -> RomInf
full.extend_from_slice(header);
full.extend_from_slice(content);
let full_crc = crc32fast::hash(&full);
let full_sha1 = Sha1::from(&full).digest().to_string();
let full_sha1 = format!("{:x}", Sha1::digest(&full));
let content_crc = crate::utils::derive_content_crc(full_crc, header, content.len());
RomInfo {
name: name.to_string(),
Expand Down
10 changes: 8 additions & 2 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,11 @@ pub enum CandidateSource {
/// Single file decompressed from a gzip stream.
Gzip { archive: PathBuf },
/// One member extracted from a zip archive.
Zip { archive: PathBuf, member: String },
Zip {
archive: PathBuf,
member: String,
password: Option<String>,
},
Comment thread
pathawks marked this conversation as resolved.
/// Decompressed from an LZMA/XZ block found at `offset` inside `parent`.
Lzma { parent: PathBuf, offset: usize },
/// One entry extracted from a KPKA/PAK archive (index is entry ordinal, 0-based).
Expand Down Expand Up @@ -268,7 +272,9 @@ impl std::fmt::Display for Candidate {
.unwrap_or("???".into());
write!(f, "[gzip in {}]", a)?;
}
CandidateSource::Zip { archive, member } => {
CandidateSource::Zip {
archive, member, ..
} => {
let a = archive
.file_name()
.map(|s| s.to_string_lossy())
Expand Down
152 changes: 115 additions & 37 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const LZMA1_VALID_PROPS: [bool; 256] = {
/// Check if a dict_size is plausible for real LZMA1 streams.
/// Real dict sizes are powers of 2 or 2^n + 2^(n-1) (i.e. 3 * 2^(n-1)).
fn is_valid_lzma_dict_size(d: u32) -> bool {
d.is_power_of_two() || (d % 3 == 0 && d / 3 > 0 && (d / 3).is_power_of_two())
d.is_power_of_two() || (d.is_multiple_of(3) && d / 3 > 0 && (d / 3).is_power_of_two())
}

use crate::{Candidate, RomInfo};
Expand Down Expand Up @@ -209,9 +209,15 @@ pub fn load_rom_list(dat_path: &Path, maybe_game: Option<&str>) -> anyhow::Resul
/// Detect compressed archives by magic bytes and expand into `Candidate`s.
///
/// - `.gz` / gzip magic (`1f 8b`): decompress into one candidate.
/// - `.zip` magic (`PK\x03\x04`): one candidate per unencrypted file member.
/// - `.zip` magic (`PK\x03\x04`): one candidate per file member. Encrypted members
/// are decrypted using the provided `passwords`; members that cannot be decrypted
/// are skipped with a warning (when `verbose` is set).
/// - Everything else: one plain candidate.
pub fn load_candidates_from_paths<I>(paths: I, verbose: bool) -> anyhow::Result<Vec<Candidate>>
pub fn load_candidates_from_paths<I>(
paths: I,
passwords: &[String],
verbose: bool,
) -> anyhow::Result<Vec<Candidate>>
Comment thread
pathawks marked this conversation as resolved.
Comment thread
pathawks marked this conversation as resolved.
where
I: IntoIterator,
I::Item: AsRef<Path>,
Expand Down Expand Up @@ -257,45 +263,117 @@ where
let cursor = std::io::Cursor::new(&data);
let mut archive = zip::ZipArchive::new(cursor)
.with_context(|| format!("Opening zip {}", path.display()))?;
for i in 0..archive.len() {
let mut entry = archive
.by_index(i)
.with_context(|| format!("Reading zip entry {} in {}", i, path.display()))?;
if entry.is_dir() {
continue;
}
if entry.encrypted() {

// Collect metadata first to avoid borrow conflicts when retrying
// passwords on encrypted entries.
let entry_meta: Vec<_> = (0..archive.len())
.map(|i| {
let entry = archive.by_index_raw(i).with_context(|| {
format!("Reading zip entry metadata {} in {}", i, path.display())
})?;
if entry.is_dir() {
return Ok(None);
}
Ok(Some((i, entry.name().to_string(), entry.encrypted())))
})
.collect::<anyhow::Result<Vec<_>>>()?
.into_iter()
.flatten()
.collect();

for (i, member_name, encrypted) in entry_meta {
if encrypted {
let mut decrypted = None;
for pw in passwords {
match archive.by_index_decrypt(i, pw.as_bytes()) {
Comment thread
pathawks marked this conversation as resolved.
Err(zip::result::ZipError::InvalidPassword) => continue,
Err(e) => {
return Err(anyhow::Error::from(e).context(format!(
"Reading encrypted zip entry '{}' in {}",
member_name,
path.display()
)));
}
Ok(mut entry) => {
let mut buf = Vec::new();
match entry.read_to_end(&mut buf) {
Ok(_) => {
decrypted = Some((buf, pw.clone()));
break;
}
// AES HMAC validation failure at end-of-stream
Err(e)
if e.kind() == std::io::ErrorKind::InvalidData
|| e.kind() == std::io::ErrorKind::InvalidInput =>
{
continue;
}
Err(e) => {
return Err(anyhow::Error::from(e).context(format!(
"Reading zip member '{}' in {}",
member_name,
path.display()
)));
}
}
}
}
}
let Some((member_data, pw)) = decrypted else {
if verbose {
eprintln!(
"skipping encrypted zip member '{}' in {}",
member_name,
path.display()
);
}
continue;
};
if verbose {
eprintln!(
"skipping encrypted zip member '{}' in {}",
entry.name(),
path.display()
" zip member: {} ({} bytes, decrypted)",
member_name,
member_data.len()
);
}
continue;
}
let member_name = entry.name().to_string();
let mut member_data = Vec::new();
entry.read_to_end(&mut member_data).with_context(|| {
format!("Reading zip member '{}' in {}", member_name, path.display())
})?;
if verbose {
eprintln!(
" zip member: {} ({} bytes)",
member_name,
member_data.len()
);
let logical_path = path.join(&member_name);
cands.push(Candidate {
path: logical_path,
data: member_data,
source: CandidateSource::Zip {
archive: path.clone(),
member: member_name,
password: Some(pw),
},
coverage: Coverage::default(),
});
} else {
let mut entry = archive.by_index(i).with_context(|| {
format!("Reading zip entry {} in {}", i, path.display())
})?;
let mut member_data = Vec::new();
entry.read_to_end(&mut member_data).with_context(|| {
format!("Reading zip member '{}' in {}", member_name, path.display())
})?;
if verbose {
eprintln!(
" zip member: {} ({} bytes)",
member_name,
member_data.len()
);
}
let logical_path = path.join(&member_name);
cands.push(Candidate {
path: logical_path,
data: member_data,
source: CandidateSource::Zip {
archive: path.clone(),
member: member_name,
password: None,
},
coverage: Coverage::default(),
});
}
let logical_path = path.join(&member_name);
cands.push(Candidate {
path: logical_path,
data: member_data,
source: CandidateSource::Zip {
archive: path.clone(),
member: member_name,
},
coverage: Coverage::default(),
});
}
} else {
// plain binary
Expand Down