Skip to content

Commit 8260d28

Browse files
authored
Extract and scan strings from unknown archive types (#467)
* Add string extraction functionality for unknown archive types, and scan these for secrets
1 parent c5e09e5 commit 8260d28

6 files changed

Lines changed: 1092 additions & 1 deletion

File tree

pkg/archive/strings.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package archive
2+
3+
import (
4+
"bytes"
5+
)
6+
7+
const (
8+
// MinStringLength is the minimum length of a string to be considered printable
9+
// This matches the default behavior of the Unix strings command
10+
MinStringLength = 4
11+
)
12+
13+
// ExtractPrintableStrings extracts printable ASCII strings from binary data.
14+
// It works similarly to the Unix 'strings' command, extracting sequences of printable
15+
// ASCII characters (and tabs, newlines, carriage returns) that are at least minLength
16+
// characters long. If minLength is 0, MinStringLength (4) is used as default.
17+
// Each extracted string is output on its own line, separated by newlines.
18+
func ExtractPrintableStrings(data []byte, minLength int) []byte {
19+
if minLength <= 0 {
20+
minLength = MinStringLength
21+
}
22+
23+
var result bytes.Buffer
24+
var currentString bytes.Buffer
25+
26+
for i := 0; i < len(data); i++ {
27+
b := data[i]
28+
29+
if isPrintableByte(b) {
30+
currentString.WriteByte(b)
31+
} else {
32+
if currentString.Len() >= minLength {
33+
result.Write(currentString.Bytes())
34+
result.WriteByte('\n')
35+
}
36+
currentString.Reset()
37+
}
38+
}
39+
40+
if currentString.Len() >= minLength {
41+
result.Write(currentString.Bytes())
42+
result.WriteByte('\n')
43+
}
44+
45+
return result.Bytes()
46+
}
47+
48+
// isPrintableByte checks if a byte represents a printable ASCII character.
49+
// This includes ASCII printable characters (32-126), tabs, newlines, and carriage returns.
50+
// We use byte-level checking to match the behavior of the Unix strings command,
51+
// which operates on bytes rather than UTF-8 runes.
52+
func isPrintableByte(b byte) bool {
53+
// Accept tab, newline, and carriage return
54+
if b == '\t' || b == '\n' || b == '\r' {
55+
return true
56+
}
57+
58+
// Accept ASCII printable characters (space through tilde)
59+
if b >= 32 && b <= 126 {
60+
return true
61+
}
62+
63+
return false
64+
}

0 commit comments

Comments
 (0)