This repository was archived by the owner on Apr 1, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 69
Expand file tree
/
Copy pathtest_progress_bar.py
More file actions
174 lines (130 loc) · 5.93 KB
/
test_progress_bar.py
File metadata and controls
174 lines (130 loc) · 5.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import tempfile
import numpy as np
import pandas as pd
import pytest
import bigframes as bf
import bigframes.formatting_helpers as formatting_helpers
from bigframes.session import MAX_INLINE_DF_BYTES
job_load_message_regex = r"Query"
EXPECTED_DRY_RUN_MESSAGE = "Computation deferred. Computation will process"
def test_progress_bar_dataframe(
penguins_df_default_index: bf.dataframe.DataFrame, capsys
):
capsys.readouterr() # clear output
with bf.option_context("display.progress_bar", "terminal"):
penguins_df_default_index.to_pandas(allow_large_results=True)
assert_loading_msg_exist(capsys.readouterr().out)
assert penguins_df_default_index.query_job is not None
def test_progress_bar_series(penguins_df_default_index: bf.dataframe.DataFrame, capsys):
series = penguins_df_default_index["body_mass_g"].head(10)
capsys.readouterr() # clear output
with bf.option_context("display.progress_bar", "terminal"):
series.to_pandas(allow_large_results=True)
assert_loading_msg_exist(capsys.readouterr().out)
assert series.query_job is not None
def test_progress_bar_scalar(penguins_df_default_index: bf.dataframe.DataFrame, capsys):
capsys.readouterr() # clear output
with bf.option_context("display.progress_bar", "terminal"):
penguins_df_default_index["body_mass_g"].head(10).mean()
assert_loading_msg_exist(capsys.readouterr().out)
def test_progress_bar_scalar_allow_large_results(
penguins_df_default_index: bf.dataframe.DataFrame, capsys
):
capsys.readouterr() # clear output
with bf.option_context(
"display.progress_bar", "terminal", "compute.allow_large_results", "True"
):
penguins_df_default_index["body_mass_g"].head(10).mean()
assert_loading_msg_exist(capsys.readouterr().out)
def test_progress_bar_extract_jobs(
penguins_df_default_index: bf.dataframe.DataFrame, gcs_folder, capsys
):
path = gcs_folder + "test_read_csv_progress_bar*.csv"
capsys.readouterr() # clear output
with bf.option_context("display.progress_bar", "terminal"):
penguins_df_default_index.to_csv(path)
assert_loading_msg_exist(capsys.readouterr().out)
def test_progress_bar_load_jobs(
session: bf.Session, penguins_pandas_df_default_index: pd.DataFrame, capsys
):
# repeat the DF to be big enough to trigger the load job.
df = penguins_pandas_df_default_index
while len(df) < MAX_INLINE_DF_BYTES:
df = pd.DataFrame(np.repeat(df.values, 2, axis=0))
# default write engine usually streaming, which doesn't have job
with bf.option_context(
"display.progress_bar",
"terminal",
"compute.default_write_engine",
"bigquery_load",
), tempfile.TemporaryDirectory() as dir:
path = dir + "/test_read_csv_progress_bar*.csv"
df.to_csv(path, index=False)
capsys.readouterr() # clear output
session.read_csv(path)
assert_loading_msg_exist(capsys.readouterr().out, pattern="Load")
def assert_loading_msg_exist(capstdout: str, pattern=job_load_message_regex):
num_loading_msg = 0
lines = capstdout.split("\n")
lines = [line for line in lines if len(line) > 0]
assert len(lines) > 0
for line in lines:
if re.search(pattern, line) is not None:
num_loading_msg += 1
assert num_loading_msg > 0
def test_query_job_repr(penguins_df_default_index: bf.dataframe.DataFrame):
penguins_df_default_index.to_pandas(allow_large_results=True)
query_job_repr = formatting_helpers.repr_query_job(
penguins_df_default_index.query_job
)
string_checks = [
"Job",
"Destination Table",
"Slot Time",
"Bytes Processed",
"Cache hit",
]
for string in string_checks:
assert string in query_job_repr
def test_query_job_dry_run_dataframe(penguins_df_default_index: bf.dataframe.DataFrame):
with bf.option_context("display.repr_mode", "deferred"):
df_result = repr(penguins_df_default_index)
assert EXPECTED_DRY_RUN_MESSAGE in df_result
def test_query_job_dry_run_index(penguins_df_default_index: bf.dataframe.DataFrame):
with bf.option_context("display.repr_mode", "deferred"):
index_result = repr(penguins_df_default_index.index)
assert EXPECTED_DRY_RUN_MESSAGE in index_result
def test_query_job_dry_run_series(penguins_df_default_index: bf.dataframe.DataFrame):
with bf.option_context("display.repr_mode", "deferred"):
series_result = repr(penguins_df_default_index["body_mass_g"])
assert EXPECTED_DRY_RUN_MESSAGE in series_result
def test_repr_anywidget_dataframe(penguins_df_default_index: bf.dataframe.DataFrame):
pytest.importorskip("anywidget")
with bf.option_context("display.repr_mode", "anywidget"):
actual_repr = repr(penguins_df_default_index)
assert "species" in actual_repr
assert "island" in actual_repr
assert "[344 rows x 7 columns]" in actual_repr
def test_repr_anywidget_index(penguins_df_default_index: bf.dataframe.DataFrame):
pytest.importorskip("anywidget")
with bf.option_context("display.repr_mode", "anywidget"):
index = penguins_df_default_index.index
actual_repr = repr(index)
# In non-interactive environments, should still get a useful summary.
assert "Index" in actual_repr
assert "0, 1, 2, 3, 4" in actual_repr
assert "dtype='Int64'" in actual_repr