Only run doctests on pandas 3, update doctest for pandas 3

rok · rok · commit ae226b40ce23 · 2026-02-04T23:40:41.000+01:00
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -69,10 +69,10 @@ jobs:
           - conda-python-3.12-no-numpy
         include:
           - name: conda-python-docs
-            cache: conda-python-3.10
+            cache: conda-python-3.11
             image: conda-python-docs
-            title: AMD64 Conda Python 3.10 Sphinx & Numpydoc
-            python: "3.10"
+            title: AMD64 Conda Python 3.11 Sphinx & Numpydoc
+            python: "3.11"
           - name: conda-python-3.11-nopandas
             cache: conda-python-3.11
             image: conda-python
diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst
@@ -684,7 +684,7 @@ When using :class:`~.DictionaryArray` with pandas, the analogue is
    6    NaN
    7    baz
    dtype: category
-   Categories (3, object): ['foo', 'bar', 'baz']
+   Categories (3, str): ['foo', 'bar', 'baz']
 
 .. _data.record_batch:
 
diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst
@@ -160,12 +160,12 @@ DataFrame output:
    >>> with pa.ipc.open_file(buf) as reader:
    ...     df = reader.read_pandas()
    >>> df[:5]
-      f0    f1     f2
-   0   1   foo   True
-   1   2   bar   None
-   2   3   baz  False
-   3   4  None   True
-   4   1   foo   True
+      f0   f1     f2
+   0   1  foo   True
+   1   2  bar   None
+   2   3  baz  False
+   3   4  NaN   True
+   4   1  foo   True
 
 Efficiently Writing and Reading Arrow Data
 ------------------------------------------
diff --git a/docs/source/python/pandas.rst b/docs/source/python/pandas.rst
@@ -170,7 +170,7 @@ number of possible values.
 
    >>> df = pd.DataFrame({"cat": pd.Categorical(["a", "b", "c", "a", "b", "c"])})
    >>> df.cat.dtype.categories
-   Index(['a', 'b', 'c'], dtype='object')
+   Index(['a', 'b', 'c'], dtype='str')
    >>> df
      cat
    0   a
@@ -182,7 +182,7 @@ number of possible values.
    >>> table = pa.Table.from_pandas(df)
    >>> table
    pyarrow.Table
-   cat: dictionary<values=string, indices=int8, ordered=0>
+   cat: dictionary<values=large_string, indices=int8, ordered=0>
    ----
    cat: [  -- dictionary:
    ["a","b","c"]  -- indices:
@@ -196,7 +196,7 @@ same categories of the Pandas DataFrame.
    >>> column = table[0]
    >>> chunk = column.chunk(0)
    >>> chunk.dictionary
-   <pyarrow.lib.StringArray object at ...>
+   <pyarrow.lib.LargeStringArray object at ...>
    [
      "a",
      "b",
@@ -224,7 +224,7 @@ use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow
 
    >>> df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="h", periods=3)})
    >>> df.dtypes
-   datetime    datetime64[ns, UTC]
+   datetime    datetime64[us, UTC]
    dtype: object
    >>> df
                       datetime
@@ -234,9 +234,9 @@ use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow
    >>> table = pa.Table.from_pandas(df)
    >>> table
    pyarrow.Table
-   datetime: timestamp[ns, tz=UTC]
+   datetime: timestamp[us, tz=UTC]
    ----
-   datetime: [[2020-01-01 00:00:00.000000000Z,...,2020-01-01 02:00:00.000000000Z]]
+   datetime: [[2020-01-01 00:00:00.000000Z,2020-01-01 01:00:00.000000Z,2020-01-01 02:00:00.000000Z]]
 
 In this example the Pandas Timestamp is time zone aware
 (``UTC`` on this case), and this information is used to create the Arrow
diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst
@@ -238,9 +238,9 @@ concatenate them into a single table. You can read individual row groups with
    >>> parquet_file.read_row_group(0)
    pyarrow.Table
    one: double
-   two: string
+   two: large_string
    three: bool
-   __index_level_0__: string
+   __index_level_0__: large_string
    ----
    one: [[-1,null,2.5]]
    two: [["foo","bar","baz"]]
@@ -352,7 +352,7 @@ and improved performance for columns with many repeated string values.
    one: double
    two: dictionary<values=string, indices=int32, ordered=0>
    three: bool
-   __index_level_0__: string
+   __index_level_0__: large_string
    ----
    one: [[-1,null,2.5]]
    two: [  -- dictionary: