@@ -71,23 +71,59 @@ def _get_cached_flow(fid: int) -> OpenMLFlow:
7171
7272@openml .utils .thread_safe_if_oslo_installed
7373def get_flow (flow_id : int , reinstantiate : bool = False , strict_version : bool = True ) -> OpenMLFlow : # noqa: FBT002
74- """Download the OpenML flow for a given flow ID.
74+ """Fetch an OpenMLFlow by its server-assigned ID.
75+
76+ Queries the OpenML REST API for the flow metadata and returns an
77+ :class:`OpenMLFlow` instance. If the flow is already cached locally,
78+ the cached copy is returned. Optionally the flow can be re-instantiated
79+ into a concrete model instance using the registered extension.
7580
7681 Parameters
7782 ----------
7883 flow_id : int
7984 The OpenML flow id.
80-
81- reinstantiate: bool
82- Whether to reinstantiate the flow to a model instance.
83-
84- strict_version : bool, default=True
85- Whether to fail if version requirements are not fulfilled.
85+ reinstantiate : bool, optional (default=False)
86+ If True, convert the flow description into a concrete model instance
87+ using the flow's extension (e.g., sklearn). If conversion fails and
88+ ``strict_version`` is True, an exception will be raised.
89+ strict_version : bool, optional (default=True)
90+ When ``reinstantiate`` is True, whether to enforce exact version
91+ requirements for the extension/model. If False, a new flow may
92+ be returned when versions differ.
8693
8794 Returns
8895 -------
89- flow : OpenMLFlow
90- the flow
96+ OpenMLFlow
97+ The flow object with metadata; ``model`` may be populated when
98+ ``reinstantiate=True``.
99+
100+ Raises
101+ ------
102+ OpenMLCacheException
103+ When cached flow files are corrupted or cannot be read.
104+ OpenMLServerException
105+ When the REST API call fails.
106+
107+ Side Effects
108+ ------------
109+ - Writes to ``openml.config.cache_directory/flows/{flow_id}/flow.xml``
110+ when the flow is downloaded from the server.
111+
112+ Preconditions
113+ -------------
114+ - Network access to the OpenML server is required unless the flow is cached.
115+ - For private flows, ``openml.config.apikey`` must be set.
116+
117+ Notes
118+ -----
119+ Results are cached to speed up subsequent calls. When ``reinstantiate`` is
120+ True and version mismatches occur, a new flow may be returned to reflect
121+ the converted model (only when ``strict_version`` is False).
122+
123+ Examples
124+ --------
125+ >>> import openml
126+ >>> flow = openml.flows.get_flow(5) # doctest: +SKIP
91127 """
92128 flow_id = int (flow_id )
93129 flow = _get_flow_description (flow_id )
@@ -138,32 +174,47 @@ def list_flows(
138174 tag : str | None = None ,
139175 uploader : str | None = None ,
140176) -> pd .DataFrame :
141- """
142- Return a list of all flows which are on OpenML.
143- (Supports large amount of results)
177+ """List flows available on the OpenML server.
178+
179+ This function supports paging and filtering and returns a pandas
180+ DataFrame with one row per flow and columns for id, name, version,
181+ external_version, full_name and uploader.
144182
145183 Parameters
146184 ----------
147185 offset : int, optional
148- the number of flows to skip, starting from the first
186+ Number of flows to skip, starting from the first (for paging).
149187 size : int, optional
150- the maximum number of flows to return
188+ Maximum number of flows to return.
151189 tag : str, optional
152- the tag to include
153- kwargs: dict , optional
154- Legal filter operators: uploader .
190+ Only return flows having this tag.
191+ uploader : str , optional
192+ Only return flows uploaded by this user .
155193
156194 Returns
157195 -------
158- flows : dataframe
159- Each row maps to a dataset
160- Each column contains the following information:
161- - flow id
162- - full name
163- - name
164- - version
165- - external version
166- - uploader
196+ pandas.DataFrame
197+ Rows correspond to flows. Columns include ``id``, ``full_name``,
198+ ``name``, ``version``, ``external_version``, and ``uploader``.
199+
200+ Raises
201+ ------
202+ OpenMLServerException
203+ When the API call fails.
204+
205+ Side Effects
206+ ------------
207+ - None: results are fetched and returned; Read-only operation.
208+
209+ Preconditions
210+ -------------
211+ - Network access is required to list flows unless cached mechanisms are
212+ used by the underlying API helper.
213+
214+ Examples
215+ --------
216+ >>> import openml
217+ >>> flows = openml.flows.list_flows(size=100) # doctest: +SKIP
167218 """
168219 listing_call = partial (_list_flows , tag = tag , uploader = uploader )
169220 batches = openml .utils ._list_all (listing_call , offset = offset , limit = size )
@@ -206,25 +257,35 @@ def _list_flows(limit: int, offset: int, **kwargs: Any) -> pd.DataFrame:
206257
207258
208259def flow_exists (name : str , external_version : str ) -> int | bool :
209- """Retrieves the flow id .
260+ """Check whether a flow (name + external_version) exists on the server .
210261
211- A flow is uniquely identified by name + external_version.
262+ The OpenML server defines uniqueness of flows by the pair
263+ ``(name, external_version)``. This helper queries the server and
264+ returns the corresponding flow id when present.
212265
213266 Parameters
214267 ----------
215- name : string
216- Name of the flow
217- external_version : string
268+ name : str
269+ Flow name (e.g., ``sklearn.tree._classes.DecisionTreeClassifier(1)``).
270+ external_version : str
218271 Version information associated with flow.
219272
220273 Returns
221274 -------
222- flow_exist : int or bool
223- flow id iff exists, False otherwise
224-
225- Notes
226- -----
227- see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
275+ int or bool
276+ The flow id if the flow exists on the server, otherwise ``False``.
277+
278+ Raises
279+ ------
280+ ValueError
281+ If ``name`` or ``external_version`` are empty or not strings.
282+ OpenMLServerException
283+ When the API request fails.
284+
285+ Examples
286+ --------
287+ >>> import openml
288+ >>> openml.flows.flow_exists("weka.JRip", "Weka_3.9.0_10153") # doctest: +SKIP
228289 """
229290 if not (isinstance (name , str ) and len (name ) > 0 ):
230291 raise ValueError ("Argument 'name' should be a non-empty string" )
@@ -247,35 +308,58 @@ def get_flow_id(
247308 name : str | None = None ,
248309 exact_version : bool = True , # noqa: FBT002
249310) -> int | bool | list [int ]:
250- """Retrieves the flow id for a model or a flow name.
311+ """Retrieve flow id(s) for a model instance or a flow name.
251312
252- Provide either a model or a name to this function. Depending on the input, it does
313+ Provide either a concrete ``model`` (which will be converted to a flow by
314+ the appropriate extension) or a flow ``name``. Behavior depends on
315+ ``exact_version``:
253316
254- * ``model`` and ``exact_version == True``: This helper function first queries for the necessary
255- extension. Second, it uses that extension to convert the model into a flow. Third, it
256- executes ``flow_exists`` to potentially obtain the flow id the flow is published to the
257- server.
258- * ``model`` and ``exact_version == False``: This helper function first queries for the
259- necessary extension. Second, it uses that extension to convert the model into a flow. Third
260- it calls ``list_flows`` and filters the returned values based on the flow name.
261- * ``name``: Ignores ``exact_version`` and calls ``list_flows``, then filters the returned
262- values based on the flow name.
317+ - ``model`` + ``exact_version=True``: convert ``model`` to a flow and call
318+ :func:`flow_exists` to get a single flow id (or False).
319+ - ``model`` + ``exact_version=False``: convert ``model`` to a flow and
320+ return all server flow ids with the same flow name.
321+ - ``name``: ignore ``exact_version`` and return all server flow ids that
322+ match ``name``.
263323
264324 Parameters
265325 ----------
266- model : object
267- Any model. Must provide either `` model`` or ``name``.
268- name : str
269- Name of the flow. Must provide either ``model`` or ``name``.
270- exact_version : bool
271- Whether to return the flow id of the exact version or all flow ids where the name
272- of the flow matches. This is only taken into account for a model where a version number
273- is available (requires ``model`` to be set) .
326+ model : object, optional
327+ A model instance that can be handled by a registered extension. Either
328+ ``model`` or `` name`` must be provided.
329+ name : str, optional
330+ Flow name to query for. Either ``model`` or ``name`` must be provided.
331+ exact_version : bool, optional (default=True)
332+ When True and ``model`` is provided, only return the id for the exact
333+ external version. When False, return a list of matching ids .
274334
275335 Returns
276336 -------
277- int or bool, List
278- flow id iff exists, ``False`` otherwise, List if ``exact_version is False``
337+ int or bool or list[int]
338+ If ``exact_version`` is True: the flow id if found, otherwise ``False``.
339+ If ``exact_version`` is False: a list of matching flow ids (may be empty).
340+
341+ Raises
342+ ------
343+ ValueError
344+ If neither ``model`` nor ``name`` is provided, or if both are provided.
345+ OpenMLServerException
346+ If underlying API calls fail.
347+
348+ Side Effects
349+ ------------
350+ - May call server APIs (``flow/exists``, ``flow/list``) and therefore
351+ depends on network access and API keys for private flows.
352+
353+ Examples
354+ --------
355+ >>> import openml
356+ >>> # Lookup by flow name
357+ >>> openml.flows.get_flow_id(name="weka.JRip") # doctest: +SKIP
358+ >>> # Lookup by model instance (requires a registered extension)
359+ >>> import sklearn
360+ >>> import openml_sklearn
361+ >>> clf = sklearn.tree.DecisionTreeClassifier()
362+ >>> openml.flows.get_flow_id(model=clf) # doctest: +SKIP
279363 """
280364 if model is not None and name is not None :
281365 raise ValueError ("Must provide either argument `model` or argument `name`, but not both." )
@@ -391,6 +475,21 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915
391475
392476 check_description : bool
393477 Whether to ignore matching of flow descriptions.
478+
479+ Raises
480+ ------
481+ TypeError
482+ When either argument is not an :class:`OpenMLFlow`.
483+ ValueError
484+ When a relevant mismatch is found between the two flows.
485+
486+ Examples
487+ --------
488+ >>> import openml
489+ >>> f1 = openml.flows.get_flow(5) # doctest: +SKIP
490+ >>> f2 = openml.flows.get_flow(5) # doctest: +SKIP
491+ >>> openml.flows.assert_flows_equal(f1, f2) # doctest: +SKIP
492+ >>> # If flows differ, a ValueError is raised
394493 """
395494 if not isinstance (flow1 , OpenMLFlow ):
396495 raise TypeError (f"Argument 1 must be of type OpenMLFlow, but is { type (flow1 )} " )
@@ -550,5 +649,20 @@ def delete_flow(flow_id: int) -> bool:
550649 -------
551650 bool
552651 True if the deletion was successful. False otherwise.
652+
653+ Raises
654+ ------
655+ OpenMLServerException
656+ If the server-side deletion fails due to permissions or other errors.
657+
658+ Side Effects
659+ ------------
660+ - Removes the flow from the OpenML server (if permitted).
661+
662+ Examples
663+ --------
664+ >>> import openml
665+ >>> # Deletes flow 23 if you are the uploader and it's not linked to runs
666+ >>> openml.flows.delete_flow(23) # doctest: +SKIP
553667 """
554668 return openml .utils ._delete_entity ("flow" , flow_id )
0 commit comments