Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
to_json,
to_json_string,
)
from bigframes.bigquery._operations.mathematical import rand
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.sql import sql_scalar
from bigframes.bigquery._operations.struct import struct
Expand Down Expand Up @@ -97,6 +98,8 @@
parse_json,
to_json,
to_json_string,
# mathematical ops
rand,
# search ops
create_vector_index,
vector_search,
Expand Down Expand Up @@ -148,6 +151,8 @@
"parse_json",
"to_json",
"to_json_string",
# mathematical ops
"rand",
# search ops
"create_vector_index",
"vector_search",
Expand Down
66 changes: 66 additions & 0 deletions bigframes/bigquery/_operations/mathematical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Union

from bigframes import dataframe, dtypes
from bigframes import operations as ops
from bigframes import series


def rand(input_data: Union[series.Series, dataframe.DataFrame]) -> series.Series:
"""
Generates a pseudo-random value of type FLOAT64 in the range of [0, 1),
inclusive of 0 and exclusive of 1.

.. warning::
This method introduces non-determinism to the expression. Reading the
same column twice may result in different results.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> df = bpd.DataFrame({"a": [1, 2, 3]})
>>> df['random'] = bbq.rand(df)
>>> # Resulting column 'random' will contain random floats between 0 and 1.

Args:
input_data (bigframes.pandas.Series or bigframes.pandas.DataFrame):
A Series or DataFrame to determine the number of rows and the index
of the result. The actual values in this input are ignored.

Returns:
bigframes.pandas.Series: A new Series of random float values.
"""
if isinstance(input_data, dataframe.DataFrame):
if len(input_data.columns) == 0:
raise ValueError("Input DataFrame must have at least one column.")
# Use the first column as anchor
anchor = input_data.iloc[:, 0]
elif isinstance(input_data, series.Series):
anchor = input_data
else:
raise TypeError(
f"Unsupported type {type(input_data)}. "
"Expected bigframes.pandas.Series or bigframes.pandas.DataFrame."
)

op = ops.SqlScalarOp(
_output_type=dtypes.FLOAT_DTYPE,
sql_template="RAND()",
)
return anchor._apply_nary_op(op, [])
36 changes: 36 additions & 0 deletions tests/system/small/bigquery/test_mathematical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import bigframes.bigquery as bbq


def test_rand(scalars_df_index):
df = scalars_df_index

# Apply rand
result = bbq.rand(df)

# Eagerly evaluate
result_pd = result.to_pandas()

# Check length
assert len(result_pd) == len(df)

# Check values in [0, 1)
assert (result_pd >= 0).all()
assert (result_pd < 1).all()

# Check not all values are equal (unlikely collision for random)
if len(result_pd) > 1:
assert result_pd.nunique() > 1
56 changes: 56 additions & 0 deletions tests/unit/bigquery/test_mathematical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest.mock as mock

import bigframes.bigquery as bbq
import bigframes.dataframe as dataframe
import bigframes.dtypes as dtypes
import bigframes.operations as ops
import bigframes.series as series


def test_rand_calls_apply_nary_op():
mock_series = mock.create_autospec(series.Series, instance=True)

bbq.rand(mock_series)

mock_series._apply_nary_op.assert_called_once()
args, _ = mock_series._apply_nary_op.call_args
op = args[0]
assert isinstance(op, ops.SqlScalarOp)
assert op.sql_template == "RAND()"
assert op._output_type == dtypes.FLOAT_DTYPE
assert args[1] == []


def test_rand_with_dataframe():
mock_df = mock.create_autospec(dataframe.DataFrame, instance=True)
# mock columns length > 0
mock_df.columns = ["col1"]
# mock iloc to return a series
mock_series = mock.create_autospec(series.Series, instance=True)
# Configure mock_df.iloc to return mock_series when indexed
# iloc is indexable, so we mock __getitem__
mock_indexer = mock.MagicMock()
mock_indexer.__getitem__.return_value = mock_series
type(mock_df).iloc = mock.PropertyMock(return_value=mock_indexer)

bbq.rand(mock_df)

mock_series._apply_nary_op.assert_called_once()
args, _ = mock_series._apply_nary_op.call_args
op = args[0]
assert isinstance(op, ops.SqlScalarOp)
assert op.sql_template == "RAND()"
Loading