-
Notifications
You must be signed in to change notification settings - Fork 39
fix: decode header values per-ecosystem (Python/JS) and expose raw header bytes #492
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
a139dc1
cbab1e3
7c52daf
5233130
db37ee1
0d0ec07
d236997
4a0e40e
1b52315
b54d354
750e31a
82d8756
6a40868
e8289e5
3bc991f
dd95522
b773362
f64e62b
deac557
060101a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,8 +5,12 @@ use tokio::sync::Mutex as AsyncMutex; | |
| use bytes::Bytes; | ||
| use encoding::label::encoding_from_whatwg_label; | ||
| use futures::{Stream, StreamExt}; | ||
| use impit::{errors::ImpitError, utils::ContentType}; | ||
| use impit::{ | ||
| errors::ImpitError, | ||
| utils::{decode_header_value, ContentType}, | ||
| }; | ||
| use pyo3::prelude::*; | ||
| use pyo3::types::PyBytes; | ||
| use reqwest::{Response, StatusCode, Version}; | ||
| use std::pin::Pin; | ||
|
|
||
|
|
@@ -223,6 +227,9 @@ pub struct ImpitPyResponse { | |
| content: Option<Vec<u8>>, | ||
| inner: Option<Response>, | ||
| inner_state: InnerResponseState, | ||
| // Raw, undecoded header name/value byte pairs (values exact; names lowercased, order not the | ||
| // original wire order - see the `raw_headers` getter docs). Exposed via the `raw_headers` getter. | ||
| raw_headers: Vec<(Vec<u8>, Vec<u8>)>, | ||
|
Comment on lines
+230
to
+232
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any chance we can follow the I suppose it would mean implementing the |
||
| } | ||
|
|
||
| #[pymethods] | ||
|
|
@@ -238,6 +245,12 @@ impl ImpitPyResponse { | |
| ) -> Self { | ||
| let headers = headers.unwrap_or_default(); | ||
|
|
||
| // No wire bytes for a manually constructed response; use the UTF-8 bytes of the strings. | ||
| let raw_headers: Vec<(Vec<u8>, Vec<u8>)> = headers | ||
| .iter() | ||
| .map(|(k, v)| (k.clone().into_bytes(), v.clone().into_bytes())) | ||
| .collect(); | ||
|
|
||
| let encoding = match headers | ||
| .iter() | ||
| .find(|(k, _)| k.to_lowercase() == "content-type") | ||
|
|
@@ -267,6 +280,7 @@ impl ImpitPyResponse { | |
| content: Some(content.unwrap_or_default()), | ||
| inner: None, | ||
| inner_state: InnerResponseState::Read, | ||
| raw_headers, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -439,6 +453,21 @@ impl ImpitPyResponse { | |
| Ok(()) | ||
| } | ||
|
|
||
| /// Raw, undecoded header name/value pairs as `(bytes, bytes)`. Similar to httpx's | ||
| /// `Response.headers.raw`, but note two differences imposed by the underlying HTTP client: | ||
| /// header names are normalized to lowercase and the original wire order is not preserved | ||
| /// (duplicate values for a name are kept). Header *values* are the exact bytes received. | ||
| /// | ||
| /// Unlike `headers` (str values decoded UTF-8-first), this returns the exact value bytes, for | ||
| /// callers that need them - e.g. verifying a header signature/HMAC. | ||
| #[getter] | ||
| fn raw_headers<'py>(&self, py: Python<'py>) -> Vec<(Bound<'py, PyBytes>, Bound<'py, PyBytes>)> { | ||
| self.raw_headers | ||
| .iter() | ||
| .map(|(name, value)| (PyBytes::new(py, name), PyBytes::new(py, value))) | ||
| .collect() | ||
| } | ||
|
|
||
| #[getter] | ||
| fn content(&mut self, py: Python<'_>) -> PyResult<Vec<u8>> { | ||
| self.read(py) | ||
|
|
@@ -536,12 +565,18 @@ impl ImpitPyResponse { | |
| _ => "Unknown".to_string(), | ||
| }; | ||
| let is_redirect = val.status().is_redirection(); | ||
| let headers = HashMap::from_iter(val.headers().iter().map(|(k, v)| { | ||
| ( | ||
| k.as_str().to_string(), | ||
| v.as_bytes().iter().map(|&b| b as char).collect::<String>(), | ||
| ) | ||
| })); | ||
| // Python/httpx semantics: decode header values UTF-8-first with an ISO-8859-1 fallback. | ||
| let headers = HashMap::from_iter( | ||
| val.headers() | ||
| .iter() | ||
| .map(|(k, v)| (k.as_str().to_string(), decode_header_value(v.as_bytes()))), | ||
| ); | ||
| // Exact wire bytes for callers that need them (httpx `Headers.raw` equivalent). | ||
| let raw_headers: Vec<(Vec<u8>, Vec<u8>)> = val | ||
| .headers() | ||
| .iter() | ||
| .map(|(k, v)| (k.as_str().as_bytes().to_vec(), v.as_bytes().to_vec())) | ||
| .collect(); | ||
|
|
||
| let content_type_charset = headers | ||
| .get("content-type") | ||
|
|
@@ -597,6 +632,7 @@ impl ImpitPyResponse { | |
| is_stream_consumed, | ||
| inner_state, | ||
| inner, | ||
| raw_headers, | ||
| }) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This (ISO-8859-1 being a bijection) imo means we don't need the
rawHeadersfield inimpit-nodebindings.See that the
rawHeadersis not a part of thefetchinterfaceimpitis implementing. Adding more fields might bite us later with maintenance costs.