Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ API reference
.. autoclass:: form2request.Request
:members:
:undoc-members:

.. autoclass:: form2request.FileField
:members:
:undoc-members:
48 changes: 42 additions & 6 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ output to build requests with any HTTP client software. It also provides
<Response [200]>

:func:`~form2request.form2request` supports :ref:`user-defined form data
<data>`, :ref:`choosing a specific submit button (or none) <click>`, and
:ref:`overriding form attributes <override>`.

<data>`, :ref:`file uploads <uploads>`, :ref:`choosing a specific submit button
(or none) <click>`, and :ref:`overriding form attributes <override>`.

.. _form:

Expand Down Expand Up @@ -93,7 +92,6 @@ ML-based solution that can can automatically find a form of a specified type
:ref:`submit button <click>`. Its :ref:`formasaurus:usage` documentation
includes an example featuring form2request.


.. _data:

Setting form data
Expand Down Expand Up @@ -142,6 +140,46 @@ To remove a field value, set it to ``None``:
>>> form2request(form, {"foo": None})
Request(url='https://example.com', method='GET', headers=[], body=b'')

.. _uploads:

Uploading files
===============

Forms that upload files use ``enctype="multipart/form-data"``. Pass a
:class:`~form2request.FileField` instance as the value for any file input
field:

>>> from form2request import FileField, form2request
>>> html = b"""
... <form enctype="multipart/form-data" method="post">
... <input type="text" name="description" />
... <input type="file" name="attachment" />
... </form>"""
>>> selector = Selector(body=html, base_url="https://example.com")
>>> form = selector.css("form")
>>> request_data = form2request(form, {
... "description": "quarterly report",
... "attachment": FileField(
... content=b"col1,col2\n1,2\n",
... filename="report.csv",
... content_type="text/csv",
... ),
... })
>>> request_data.method
'POST'
>>> request_data.url
'https://example.com'
>>> request_data.headers[0][1].startswith("multipart/form-data")
True

The ``filename`` and ``content_type`` arguments of
:class:`~form2request.FileField` are optional. When omitted, ``filename``
defaults to an empty string and ``content_type`` defaults to
``application/octet-stream``.

For non-file fields, :func:`~form2request.form2request` handles encoding
automatically — regular text fields are sent as plain text parts within the
multipart body, without needing any special wrapping.

.. _click:

Expand Down Expand Up @@ -188,7 +226,6 @@ To change that, set ``click`` to the element that should be clicked:
>>> form2request(form, click=submit_baz)
Request(url='https://example.com?foo=baz', method='GET', headers=[], body=b'')


.. _override:

Overriding form attributes
Expand All @@ -202,7 +239,6 @@ You can override the method_ and enctype_ attributes of a form:
>>> form2request(form, method="POST", enctype="text/plain")
Request(url='https://example.com', method='POST', headers=[('Content-Type', 'text/plain')], body=b'foo=bar')


.. _request:

Using request data
Expand Down
3 changes: 2 additions & 1 deletion form2request/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Build HTTP requests out of HTML forms."""

from ._base import Request, form2request
from ._base import FileField, Request, form2request

__all__ = [
"FileField",
"Request",
"form2request",
]
107 changes: 71 additions & 36 deletions form2request/_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import uuid
from collections.abc import Iterable
from dataclasses import dataclass
from typing import (
Expand All @@ -18,7 +19,17 @@
if TYPE_CHECKING:
from lxml.html import FormElement, HtmlElement

FormdataVType = Union[str, Iterable[str]]

@dataclass
class FileField:
"""A file upload value for use with multipart/form-data forms."""

content: bytes
filename: str = ""
content_type: str = "application/octet-stream"


FormdataVType = Union[str, FileField, Iterable[str]]
FormdataKVType = tuple[str, FormdataVType]
FormdataType = Optional[Union[dict[str, FormdataVType], Iterable[FormdataKVType]]]

Expand All @@ -38,28 +49,21 @@ def _enctype(
) -> str:
if enctype:
enctype = enctype.lower()
if enctype not in {"application/x-www-form-urlencoded", "text/plain"}:
if enctype not in {
"application/x-www-form-urlencoded",
"text/plain",
"multipart/form-data",
}:
raise ValueError(
f"The specified form enctype ({enctype!r}) is not supported "
f"for forms with the POST method."
)
elif click_element is not None and (
enctype := (click_element.get("formenctype") or "").lower()
):
if enctype == "multipart/form-data":
raise NotImplementedError(
f"{click_element} has formenctype set to {enctype!r}, which "
f"form2request does not currently support for forms with the "
f"POST method."
)
elif (
enctype := (form.get("enctype") or "").lower()
) and enctype == "multipart/form-data":
raise NotImplementedError(
f"{form} has enctype set to {enctype!r}, which form2request does "
f"not currently support for forms with the POST method."
)
return enctype
click_element is not None
and (enctype := (click_element.get("formenctype") or "").lower())
) or (enctype := (form.get("enctype") or "").lower()):
pass
return enctype or ""


def _url(form: FormElement, click_element: HtmlElement | None) -> str:
Expand Down Expand Up @@ -131,7 +135,7 @@ def _click_element(

def _data(
form: FormElement, data: FormdataType, click_element: HtmlElement | None
) -> list[tuple[str, str]]:
) -> list[tuple[str, str | FileField]]:
data = data or {}
if click_element is not None and (name := click_element.get("name")):
click_data = (name, cast("str", click_element.get("value")))
Expand Down Expand Up @@ -171,10 +175,33 @@ def _data(
return [
(k, v)
for k, vs in values
for v in ([vs] if isinstance(vs, (str, bytes)) else vs)
for v in ([vs] if isinstance(vs, (str, bytes, FileField)) else vs)
]


def _build_multipart_body(
data: list[tuple[str, str | FileField]], boundary: str
) -> bytes:
parts = []
for name, value in data:
if isinstance(value, FileField):
filename_part = f'; filename="{value.filename}"' if value.filename else ""
header = (
f"--{boundary}\r\n"
f'Content-Disposition: form-data; name="{name}"{filename_part}\r\n'
f"Content-Type: {value.content_type}\r\n"
f"\r\n"
).encode()
parts.append(header + value.content + b"\r\n")
else:
header = (
f'--{boundary}\r\nContent-Disposition: form-data; name="{name}"\r\n\r\n'
).encode()
parts.append(header + value.encode() + b"\r\n")
parts.append(f"--{boundary}--\r\n".encode())
return b"".join(parts)


@dataclass
class Request:
"""HTTP request data."""
Expand Down Expand Up @@ -218,11 +245,9 @@ def to_requests(self, **kwargs: Any):
return request.prepare()

def to_scrapy(self, callback: Callable, **kwargs: Any):
"""Convert the request to :class:`scrapy.Request
<scrapy.http.Request>`.
"""Convert the request to :class:`scrapy.Request`.

All *kwargs* are passed to :class:`scrapy.Request
<scrapy.http.Request>` as is.
All *kwargs* are passed to :class:`scrapy.Request` as is.
"""
import scrapy

Expand Down Expand Up @@ -282,23 +307,33 @@ def form2request(
click_element = _click_element(form_el, click)
url = _url(form_el, click_element)
method = _method(form_el, click_element, method)
headers = []
body = ""
data = _data(form_el, data, click_element)
if method == "GET":
url = urlunsplit(urlsplit(url)._replace(query=urlencode(data, doseq=True)))
else:
assert method == "POST"
enctype = _enctype(form_el, click_element, enctype)
if enctype == "text/plain":
headers = [("Content-Type", "text/plain")]
body = "\n".join(f"{k}={v}" for k, v in data)
else:
headers = [("Content-Type", "application/x-www-form-urlencoded")]
body = urlencode(data, doseq=True)
return Request(url=url, method=method, headers=[], body=b"")
assert method == "POST"
enctype = _enctype(form_el, click_element, enctype)
if enctype == "multipart/form-data":
boundary = uuid.uuid4().hex
headers = [("Content-Type", f'multipart/form-data; boundary="{boundary}"')]
return Request(
url=url,
method=method,
headers=headers,
body=_build_multipart_body(data, boundary),
)
if enctype == "text/plain":
body = "\n".join(f"{k}={v}" for k, v in data)
return Request(
url=url,
method=method,
headers=[("Content-Type", "text/plain")],
body=body.encode(),
)
body = urlencode(data, doseq=True)
return Request(
url=url,
method=method,
headers=headers,
headers=[("Content-Type", "application/x-www-form-urlencoded")],
body=body.encode(),
)
Loading
Loading