Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Unreleased

- Add optional per-thread LRU cache for JA4 outputs (default 4096 entries).

## 1.0.0 (2026-02-12)

- Add optional raw fingerprint outputs.
Expand Down
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,23 @@ If you need the raw outputs (see [JA4 specification](#ja4-specification)), pass
lua-load-per-thread /etc/haproxy/ja4.lua raw
```

`ja4.lua` also supports an optional cache size argument (number of entries):

```haproxy
# 8192-entry LRU cache per thread
lua-load-per-thread /etc/haproxy/ja4.lua 8192

# raw mode + 8192-entry LRU cache per thread
lua-load-per-thread /etc/haproxy/ja4.lua raw 8192
```

Notes:

- Default cache size is `4096` entries.
- Set cache size to `0` to disable caching.
- Cache key is the exact TLS input used to compute JA4, so identical input reuses output.
- With `lua-load-per-thread`, caches are **not shared across threads** (one cache per HAProxy thread).

### TCP mode

<details>
Expand Down
208 changes: 194 additions & 14 deletions ja4.lua
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,146 @@ end

-- Compute optional fingerprint variants: ja4_r, ja4_o, ja4_ro
local args = table.pack(...)
local RAW_OUTPUTS = (args[1] == "raw")
local RAW_OUTPUTS = false
local CACHE_CAPACITY = 4096

for i = 1, args.n do
local arg = args[i]
if arg == "raw" then
RAW_OUTPUTS = true
else
local n = tonumber(arg)
if n then
CACHE_CAPACITY = math.max(0, math.floor(n))
end
end
end

local CACHE_ENABLED = CACHE_CAPACITY > 0

-- Per-thread LRU cache. With `lua-load-per-thread`, each HAProxy thread gets
-- its own Lua VM and therefore its own cache instance.
--
-- LRU shape (doubly-linked list + hash table):
--
-- CACHE[key] -> node { key, value, prev, next }
--
-- CACHE_HEAD (most recently used) CACHE_TAIL (least recently used)
-- | |
-- v v
-- [ MRU ] <-> [ ... ] <-> [ ... ] <-> [ LRU ]
--
-- Access flow:
-- 1) cache_get(key): node exists -> move node to head (MRU)
-- 2) cache_put(new): insert node at head
-- 3) if size > capacity: evict tail node (LRU)
--
-- This gives O(1) average lookup/update/eviction and favors repeated
-- handshakes from similar clients.
local CACHE = {}
local CACHE_HEAD = nil
local CACHE_TAIL = nil
local CACHE_SIZE = 0

local function lru_detach(node)
if node.prev then
node.prev.next = node.next
else
CACHE_HEAD = node.next
end

if node.next then
node.next.prev = node.prev
else
CACHE_TAIL = node.prev
end

node.prev = nil
node.next = nil
end

local function lru_insert_head(node)
node.prev = nil
node.next = CACHE_HEAD

if CACHE_HEAD then
CACHE_HEAD.prev = node
else
CACHE_TAIL = node
end

CACHE_HEAD = node
end

local function cache_get(key)
local node = CACHE[key]
if not node then
return nil
end

if node ~= CACHE_HEAD then
lru_detach(node)
lru_insert_head(node)
end

return node.value
end

local function cache_put(key, value)
local existing = CACHE[key]
if existing then
existing.value = value
if existing ~= CACHE_HEAD then
lru_detach(existing)
lru_insert_head(existing)
end
return
end

local node = { key = key, value = value }
CACHE[key] = node
lru_insert_head(node)
CACHE_SIZE = CACHE_SIZE + 1

if CACHE_SIZE > CACHE_CAPACITY and CACHE_TAIL then
local evicted = CACHE_TAIL
lru_detach(evicted)
CACHE[evicted.key] = nil
CACHE_SIZE = CACHE_SIZE - 1
end
end

local function pack_cache_string(value)
if value == nil then
return string.pack(">I4", 0xFFFFFFFF)
end

return string.pack(">I4", #value) .. value
end

local function make_cache_key(
protocol_id,
has_sni,
http_version,
supported_versions,
alpn_value,
cipher_bin,
ext_bin,
sigalg_bin
)
return table.concat({
tostring(protocol_id or -1),
"|",
tostring((has_sni == true or has_sni == 1) and 1 or 0),
"|",
pack_cache_string(http_version),
pack_cache_string(supported_versions),
pack_cache_string(alpn_value),
pack_cache_string(cipher_bin),
pack_cache_string(ext_bin),
pack_cache_string(sigalg_bin),
})
end

local function sha256_truncated(c, input)
-- lower() is necessary as txn.c:hex returns uppercase characters
Expand All @@ -95,16 +234,48 @@ end
-- Benchmarks showed that keeping logic in one function (instead of splitting
-- up into helper functions) improved throughput by ~7-10%.
local function _ja4(txn)
-- Fetch all TLS inputs once.
local protocol_id = txn.f:ssl_fc_protocol_hello_id()
local has_sni = txn.f:ssl_fc_has_sni()
local http_version = txn.f:req_ver()
local supported_versions = txn.f:ssl_fc_supported_versions_bin(1)
local alpn_value = txn.f:ssl_fc_alpn()
local cipher_bin = txn.f:ssl_fc_cipherlist_bin(1)
local ext_bin = txn.f:ssl_fc_extlist_bin(1)
local sigalg_bin = txn.f:ssl_fc_sigalgs_bin(1)

local cache_key
if CACHE_ENABLED then
cache_key = make_cache_key(
protocol_id,
has_sni,
http_version,
supported_versions,
alpn_value,
cipher_bin,
ext_bin,
sigalg_bin
)
local cached = cache_get(cache_key)
if cached then
txn:set_var("txn.ja4", cached.ja4)
if RAW_OUTPUTS then
txn:set_var("txn.ja4_r", cached.ja4_r)
txn:set_var("txn.ja4_o", cached.ja4_o)
txn:set_var("txn.ja4_ro", cached.ja4_ro)
end
return
end
end

--------------------------------------------
-- Detect protocol (t=TLS, d=DTLS, q=QUIC)
--------------------------------------------
local protocol_id = txn.f:ssl_fc_protocol_hello_id()
local is_dtls = DTLS_VERSIONS[protocol_id]
local protocol
if is_dtls then
protocol = "d"
else
local http_version = txn.f:req_ver()
if http_version and string.sub(http_version, 1, 1) == "3" then
protocol = "q"
else
Expand All @@ -116,7 +287,6 @@ local function _ja4(txn)
-- Detect TLS version from supported_versions (fallback to protocol_id)
-------------------------------------------------------------------------
local version
local supported_versions = txn.f:ssl_fc_supported_versions_bin(1)
if supported_versions and #supported_versions >= 2 then
local newest_version = nil
for i = 1, #supported_versions - 1, 2 do
Expand Down Expand Up @@ -144,7 +314,6 @@ local function _ja4(txn)
-- Extract ALPN (first and last char of negotiated protocol)
--------------------------------------------------------------
local alpn
local alpn_value = txn.f:ssl_fc_alpn()
if not alpn_value or alpn_value == "" then
alpn = "00"
else
Expand All @@ -167,7 +336,6 @@ local function _ja4(txn)
------------------
local ciphers = {}
local cipher_list_orig = ""
local cipher_bin = txn.f:ssl_fc_cipherlist_bin(1)
if cipher_bin and #cipher_bin >= 2 then
local count = 0
for i = 1, #cipher_bin - 1, 2 do
Expand All @@ -191,7 +359,6 @@ local function _ja4(txn)
extensions_orig = {}
end
local extension_count = 0
local ext_bin = txn.f:ssl_fc_extlist_bin(1)
if ext_bin and #ext_bin >= 2 then
local count = 0
for i = 1, #ext_bin - 1, 2 do
Expand All @@ -213,7 +380,6 @@ local function _ja4(txn)
-- Parse signature algorithms
-------------------------------
local signature_algorithms = {}
local sigalg_bin = txn.f:ssl_fc_sigalgs_bin(1)
if sigalg_bin and #sigalg_bin >= 2 then
local count = 0
for i = 1, #sigalg_bin - 1, 2 do
Expand All @@ -234,8 +400,6 @@ local function _ja4(txn)
-----------------------------------
-- Prepare JA4_a (various fields)
-----------------------------------
local has_sni = txn.f:ssl_fc_has_sni()

-- By default, Haproxy converts bools to ints when passing them to Lua.
-- One Lua gotcha is that 0 is truthy! HAProxy 3.1 or later can be told
-- to return bools by setting `tune.lua.bool-sample-conversion normal`.
Expand Down Expand Up @@ -301,11 +465,27 @@ local function _ja4(txn)
-------------------------------------
-- Make accessible to haproxy rules
-------------------------------------
txn:set_var("txn.ja4", fingerprint_prefix .. "_" .. cipher_hash .. "_" .. extension_hash)
local ja4_value = fingerprint_prefix .. "_" .. cipher_hash .. "_" .. extension_hash
txn:set_var("txn.ja4", ja4_value)

local ja4_r_value, ja4_o_value, ja4_ro_value
if RAW_OUTPUTS then
txn:set_var("txn.ja4_r", fingerprint_prefix .. "_" .. cipher_list .. "_" .. ja4_c_r)
txn:set_var("txn.ja4_o", fingerprint_prefix .. "_" .. cipher_hash_orig .. "_" .. ja4_c_o)
txn:set_var("txn.ja4_ro", fingerprint_prefix .. "_" .. cipher_list_orig .. "_" .. ja4_c_ro)
ja4_r_value = fingerprint_prefix .. "_" .. cipher_list .. "_" .. ja4_c_r
ja4_o_value = fingerprint_prefix .. "_" .. cipher_hash_orig .. "_" .. ja4_c_o
ja4_ro_value = fingerprint_prefix .. "_" .. cipher_list_orig .. "_" .. ja4_c_ro

txn:set_var("txn.ja4_r", ja4_r_value)
txn:set_var("txn.ja4_o", ja4_o_value)
txn:set_var("txn.ja4_ro", ja4_ro_value)
end

if CACHE_ENABLED and cache_key then
cache_put(cache_key, {
ja4 = ja4_value,
ja4_r = ja4_r_value,
ja4_o = ja4_o_value,
ja4_ro = ja4_ro_value,
})
end
end

Expand Down
60 changes: 60 additions & 0 deletions tests/spec/ja4_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,66 @@ describe("JA4 fingerprinting", function()
end)
end)

describe("cache", function()
it("reuses result for identical input", function()
local digest_calls = 0
local txn = mock_txn.create({
cipherlist_bin = "\x00\x2f\x00\x35",
extlist_bin = "\x00\x05\x00\x0a",
sigalgs_bin = "\x04\x03\x05\x03",
has_sni = true,
alpn = "h2",
})

local digest = txn.c.digest
txn.c.digest = function(self, data, algorithm)
digest_calls = digest_calls + 1
return digest(self, data, algorithm)
end

ja4(txn)
assert.equal(2, digest_calls)

ja4(txn)
assert.equal(2, digest_calls)
end)

it("does not reuse result when input changes", function()
local digest_calls = 0
local digest

local txn1 = mock_txn.create({
cipherlist_bin = "\x13\x01\x13\x02",
extlist_bin = "\x44\x69\xff\x01",
sigalgs_bin = "\x08\x04\x08\x05",
has_sni = true,
alpn = "aa",
})
digest = txn1.c.digest
txn1.c.digest = function(self, data, algorithm)
digest_calls = digest_calls + 1
return digest(self, data, algorithm)
end
ja4(txn1)

local txn2 = mock_txn.create({
cipherlist_bin = "\x13\x01\x13\x02",
extlist_bin = "\x44\x69\xff\x01",
sigalgs_bin = "\x08\x04\x08\x05",
has_sni = true,
alpn = "ab",
})
digest = txn2.c.digest
txn2.c.digest = function(self, data, algorithm)
digest_calls = digest_calls + 1
return digest(self, data, algorithm)
end
ja4(txn2)

assert.equal(4, digest_calls)
end)
end)

-- HAProxy catches Lua errors and continues processing (returns ACT_RET_CONT),
-- so errors don't cause HTTP 500 responses unless a subsequent rule depends on
-- any output vars being non-nil. In our case, we ensure that output vars are
Expand Down