Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,71 @@ def find_nearest(
stages.FindNearest(field, vector, distance_measure, options)
)

def literals(self, *documents: str | Selectable) -> "_BasePipeline":
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The type hint for *documents is incomplete. It should include dict as documents are often passed as dictionaries, which is not covered by str | Selectable.

Suggested change
def literals(self, *documents: str | Selectable) -> "_BasePipeline":
def literals(self, *documents: dict | str | Selectable) -> "_BasePipeline":

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can probably ignore this, unless that's how other languages handle it

Copy link
Copy Markdown
Contributor

@daniel-sanche daniel-sanche Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, looking at go, it seems like it accepts dicts, but not strings?

I don't know much about this stage, but from what I've seen, it's supposed to deal with maps. So maybe this should be def literals(self, *documents: Map | dict[str, CONSTANT_TYPE] | Selectable)?

Copy link
Copy Markdown
Contributor Author

@Linchin Linchin Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upon further thoughts, I think it should be def literals(self, *documents: dict | Expression):. In this case both Constant and Map are child classes of Expression.

"""
Returns documents from a fixed set of predefined document objects.

This stage is commonly used for testing other stages in isolation,
though it can also be used as inputs to join conditions.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to conflict with the statement later:

The literals(...) stage can only be used as the first stage in a pipeline (or sub-pipeline)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


Example:
>>> from google.cloud.firestore_v1.pipeline_expressions import Constant
>>> documents = [
... {"name": "joe", "age": 10},
... {"name": "bob", "age": 30},
... {"name": "alice", "age": 40}
... ]
>>> pipeline = client.pipeline()
... .literals(Constant.of(documents))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the code, it seems like:

  1. Constant isn't a Selectable
  2. Constant doesn't seem like it supports dict types. (We do have a Map, which serves that purpose, but it doesn't seem Selectable either)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this! I spent some time to check the internal docs, and I think Expression class and dict should be supported per the following language:

            While literal values are the most common, it is also possible to pass in
            expressions, which will be evaluated and returned, making it possible to test
            out different query / expression behavior without first needing to create some
            test data.

... .where(field("age").lessThan(35))

Output documents:
```json
[
{"name": "joe", "age": 10},
{"name": "bob", "age": 30}
]
```

Behavior:
The `literals(...)` stage can only be used as the first stage in a pipeline (or
sub-pipeline). The order of documents returned from the `literals` matches the
Comment thread
Linchin marked this conversation as resolved.
Outdated
order in which they are defined.

While literal values are the most common, it is also possible to pass in
expressions, which will be evaluated and returned, making it possible to test
out different query / expression behavior without first needing to create some
test data.

For example, the following shows how to quickly test out the `length(...)`
function on some constant test sets:

Example:
>>> from google.cloud.firestore_v1.pipeline_expressions import Constant
>>> documents = [
... {"x": Constant.of("foo-bar-baz").char_length()},
... {"x": Constant.of("bar").char_length()}
... ]
>>> pipeline = client.pipeline().literals(Constant.of(documents))

Output documents:
```json
[
{"x": 11},
{"x": 3}
]
```

Args:
documents: A `str` or `Selectable` expression. If a `str`, it's
treated as a field path to an array of documents.
If a `Selectable`, it's usually a `Constant`
containing an array of documents (as dictionaries).
Comment thread
Linchin marked this conversation as resolved.
Outdated
Returns:
A new Pipeline object with this stage appended to the stage list.
"""
return self._append(stages.Literals(*documents))

def replace_with(
self,
field: Selectable,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,23 @@ def _pb_args(self):
return [Value(integer_value=self.limit)]


class Literals(Stage):
"""Returns documents from a fixed set of predefined document objects."""

def __init__(self, *documents: str | Selectable):
Comment thread
Linchin marked this conversation as resolved.
Outdated
super().__init__("literals")
self.documents = documents

def _pb_args(self):
args = []
for doc in self.documents:
if hasattr(doc, "_to_pb"):
args.append(doc._to_pb())
Comment thread
Linchin marked this conversation as resolved.
Outdated
else:
args.append(encode_value(doc))
return args


class Offset(Stage):
"""Skips a specified number of documents."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -684,4 +684,23 @@ tests:
- args:
- fieldReferenceValue: awards
- stringValue: full_replace
name: replace_with
name: replace_with
- description: literals
pipeline:
- Literals:
- title: "The Hitchhiker's Guide to the Galaxy"
author: "Douglas Adams"
Comment thread
daniel-sanche marked this conversation as resolved.
assert_results:
- title: "The Hitchhiker's Guide to the Galaxy"
author: "Douglas Adams"
assert_proto:
pipeline:
stages:
- args:
- mapValue:
fields:
author:
stringValue: "Douglas Adams"
title:
stringValue: "The Hitchhiker's Guide to the Galaxy"
name: literals
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also have tests here that cover the different input types we support

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! I added additional type to test.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still need more examples. Both of these are dict-like, so it seems like a pretty basic test. What if someone passes in Constant(1)? Or Constant("test").byte_length()? We say we support all expressions, how are non-dict types represented?

It would also be good to add some extra stages, to make sure this works like others

You can use gemini to create a few extra test scenarios. I usually don't include assert_proto on all of them, because it can be excessive

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Referring to the golang implementation, the only type accepted is a list of key-value pairs. So I don't think Constant(1) or Constant("test").byte_length() are in scope.

But I think it's a good idea to add more tests. I will also use mapValue instead of Constant.

Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ def test_pipeline_execute_stream_equivalence():
("aggregate", (Field.of("n").as_("alias"),), stages.Aggregate),
("distinct", ("field_name",), stages.Distinct),
("distinct", (Field.of("n"), "second"), stages.Distinct),
("literals", (Field.of("a"),), stages.Literals),
],
)
def test_pipeline_methods(method, args, result_cls):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,35 @@ def test_to_pb(self):
assert len(result.options) == 0


class TestLiterals:
def _make_one(self, *args, **kwargs):
return stages.Literals(*args, **kwargs)

def test_ctor(self):
val1 = Constant.of({"a": 1})
Comment thread
Linchin marked this conversation as resolved.
Outdated
val2 = Constant.of({"b": 2})
instance = self._make_one(val1, val2)
assert instance.documents == (val1, val2)
assert instance.name == "literals"
Comment thread
daniel-sanche marked this conversation as resolved.

def test_repr(self):
val1 = Constant.of({"a": 1})
instance = self._make_one(val1)
repr_str = repr(instance)
assert repr_str == "Literals(documents=(Constant.of({'a': 1}),))"

def test_to_pb(self):
val1 = Constant.of({"a": 1})
val2 = Constant.of({"b": 2})
instance = self._make_one(val1, val2)
result = instance._to_pb()
assert result.name == "literals"
assert len(result.args) == 2
assert result.args[0].map_value.fields["a"].integer_value == 1
assert result.args[1].map_value.fields["b"].integer_value == 2
assert len(result.options) == 0


class TestOffset:
def _make_one(self, *args, **kwargs):
return stages.Offset(*args, **kwargs)
Expand Down
Loading