-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy patharray.py
More file actions
10052 lines (8608 loc) · 348 KB
/
array.py
File metadata and controls
10052 lines (8608 loc) · 348 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Array class.
"""
# ? implement multi group in one axis getitem: lipro['P01,P02;P05'] <=> (lipro['P01,P02'], lipro['P05'])
# * we need an API to get to the "next" label. Sometimes, we want to use label+1, but that is problematic when labels
# are not numeric, or have not a step of 1.
# X.agegroup[X.agegroup.after(25):]
# X.agegroup[X.agegroup[25].next():]
# * implement keepaxes=True for _group_aggregate instead of/in addition to group tuples
# ? implement newaxis
# * Axis.sequence? geo.seq('A31', 'A38') (equivalent to geo['A31..A38'])
# ? re-implement row_totals/col_totals? or what do we do with them?
# * time specific API so that we know if we go for a subclass or not
# * data alignment in arithmetic methods
# * test structured arrays
# * use larray "utils" in LIAM2 (to avoid duplicated code)
from itertools import product, chain, groupby
from collections.abc import Iterable, Sequence
from pathlib import Path
import builtins
import functools
import warnings
from typing import Any, Union, Tuple, List
import numpy as np
import pandas as pd
try:
import xlwings as xw
except ImportError:
xw = None
try:
from numpy import nanprod as np_nanprod
except ImportError:
np_nanprod = None
from larray.core.abstractbases import ABCArray
from larray.core.constants import nan, inf
from larray.core.metadata import Metadata
from larray.core.expr import ExprNode, BinaryOp
from larray.core.group import (Group, IGroup, LGroup, _to_key, _to_keys,
_translate_sheet_name, _translate_group_key_hdf)
from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis # noqa: F401
from larray.core.axis import align_axis_collections
from larray.core.plot import PlotObject
from larray.util.misc import (table2str, size2str, ReprString,
float_error_handler_factory, light_product, common_dtype,
renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
Repeater, Product, ensure_no_numpy_type, exactly_one, concatenate_ndarrays)
from larray.util.options import _OPTIONS, DISPLAY_MAXLINES, DISPLAY_EDGEITEMS, DISPLAY_WIDTH, DISPLAY_PRECISION
from larray.util.types import Scalar
def all(values, axis=None) -> Union['Array', Scalar]:
r"""
Test whether all array elements along a given axis evaluate to True.
See Also
--------
Array.all
"""
if isinstance(values, Array):
return values.all(axis)
else:
return builtins.all(values)
def any(values, axis=None) -> Union['Array', Scalar]:
r"""
Test whether any array elements along a given axis evaluate to True.
See Also
--------
Array.any
"""
if isinstance(values, Array):
return values.any(axis)
else:
return builtins.any(values)
# commutative modulo float precision errors
def sum(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Sum of array elements.
See Also
--------
Array.sum
"""
# XXX: we might want to be more aggressive here (more types to convert), however, generators should still be
# computed via the builtin.
if isinstance(array, (np.ndarray, list)):
array = Array(array)
if isinstance(array, Array):
return array.sum(*args, **kwargs)
else:
return builtins.sum(array, *args, **kwargs)
def prod(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Product of array elements.
See Also
--------
Array.prod
"""
return array.prod(*args, **kwargs)
def cumsum(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Return the cumulative sum of array elements.
See Also
--------
Array.cumsum
"""
return array.cumsum(*args, **kwargs)
def cumprod(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Return the cumulative product of array elements.
See Also
--------
Array.cumprod
"""
return array.cumprod(*args, **kwargs)
def min(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Minimum of array elements.
See Also
--------
Array.min
"""
if isinstance(array, Array):
return array.min(*args, **kwargs)
else:
return builtins.min(array, *args, **kwargs)
def max(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Maximum of array elements.
See Also
--------
Array.max
"""
if isinstance(array, Array):
return array.max(*args, **kwargs)
else:
return builtins.max(array, *args, **kwargs)
def mean(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Compute the arithmetic mean.
See Also
--------
Array.mean
"""
return array.mean(*args, **kwargs)
def median(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Compute the median.
See Also
--------
Array.median
"""
return array.median(*args, **kwargs)
def percentile(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Compute the qth percentile of the data along the specified axis.
See Also
--------
Array.percentile
"""
return array.percentile(*args, **kwargs)
# not commutative
def ptp(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Return the range of values (maximum - minimum).
See Also
--------
Array.ptp
"""
return array.ptp(*args, **kwargs)
def var(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Compute the variance.
See Also
--------
Array.var
"""
return array.var(*args, **kwargs)
def std(array, *args, **kwargs) -> Union['Array', Scalar]:
r"""
Compute the standard deviation.
See Also
--------
Array.std
"""
return array.std(*args, **kwargs)
def concat(arrays, axis=0, dtype=None):
r"""Concatenate arrays along axis.
Parameters
----------
arrays : tuple of Array
Arrays to concatenate.
axis : axis reference (int, str or Axis), optional
Axis along which to concatenate. All arrays must have that axis. Defaults to the first axis.
dtype : dtype, optional
Result data type. Defaults to the "closest" type which can hold all arrays types without loss of information.
Returns
-------
Array
Examples
--------
>>> arr1 = ndtest((2, 3))
>>> arr1
a\b b0 b1 b2
a0 0 1 2
a1 3 4 5
>>> arr2 = ndtest('a=a0,a1;b=b3')
>>> arr2
a\b b3
a0 0
a1 1
>>> arr3 = ndtest('b=b4,b5')
>>> arr3
b b4 b5
0 1
>>> concat((arr1, arr2, arr3), 'b')
a\b b0 b1 b2 b3 b4 b5
a0 0 1 2 0 0 1
a1 3 4 5 1 0 1
"""
# Get axis by name, so that we do *NOT* check they are "compatible", because it makes sense to append axes of
# different length
name = arrays[0].axes[axis].name
arrays_labels = [array.axes[axis].labels for array in arrays]
# switch to object dtype if labels are of incompatible types, so that we do not implicitly convert numeric types to
# strings (numpy should not do this in the first place but that is another story). This can happen for example when
# we want to add a "total" tick to a numeric axis (eg age).
combined_axis = Axis(concatenate_ndarrays(arrays_labels), name)
# combine all axes (using labels from any side if any)
result_axes = arrays[0].axes.replace(axis, combined_axis).union(*[array.axes - axis for array in arrays[1:]])
if dtype is None:
dtype = common_dtype(arrays)
result = empty(result_axes, dtype=dtype)
start = 0
for labels, array in zip(arrays_labels, arrays):
stop = start + len(labels)
result[combined_axis.i[start:stop]] = array
start = stop
return result
class ArrayIterator:
__slots__ = ('__next__',)
def __init__(self, array):
data_iter = iter(array.data)
next_data_func = data_iter.__next__
res_axes = array.axes[1:]
# this case should not happen (handled by the fastpath in Array.__iter__)
assert len(res_axes) > 0 # noqa: S101
def next_func():
return Array(next_data_func(), res_axes)
self.__next__ = next_func
def __iter__(self):
return self
# TODO: rename to ArrayIndexIndexer or something like that
# TODO: the first slice in the example below should be documented
class ArrayPositionalIndexer:
r"""
Allows selection of a subset using indices of labels.
Notes
-----
Using .i[] is equivalent to numpy indexing when indexing along a single axis. However, when indexing along multiple
axes this indexes the cross product instead of points.
Examples
--------
>>> arr = ndtest((2, 3, 4))
>>> arr
a b\c c0 c1 c2 c3
a0 b0 0 1 2 3
a0 b1 4 5 6 7
a0 b2 8 9 10 11
a1 b0 12 13 14 15
a1 b1 16 17 18 19
a1 b2 20 21 22 23
>>> arr.i[:, 0:2, [0, 2]]
a b\c c0 c2
a0 b0 0 2
a0 b1 4 6
a1 b0 12 14
a1 b1 16 18
"""
__slots__ = ('array',)
def __init__(self, array):
self.array = array
def __getitem__(self, key):
array = self.array
ndim = array.ndim
full_scalar_key = (
(isinstance(key, (int, np.integer)) and ndim == 1)
or (isinstance(key, tuple) and len(key) == ndim and all(isinstance(k, (int, np.integer)) for k in key))
)
# fast path when the result is a scalar
if full_scalar_key:
return array.data[key]
else:
return array.__getitem__(key, translate_key=False)
def __setitem__(self, key, value):
array = self.array
ndim = array.ndim
full_scalar_key = (
(isinstance(key, (int, np.integer)) and ndim == 1)
or (isinstance(key, tuple) and len(key) == ndim and all(isinstance(k, (int, np.integer)) for k in key))
)
# fast path when setting a single cell
if full_scalar_key:
array.data[key] = value
else:
array.__setitem__(key, value, translate_key=False)
def __len__(self):
return len(self.array)
def __iter__(self):
array = self.array
# fast path for 1D arrays (where we return scalars)
if array.ndim <= 1:
return iter(array.data)
else:
return ArrayIterator(array)
class ArrayPointsIndexer:
r"""
Allows selection of arbitrary items in the array based on their N-dimensional label index.
Examples
--------
>>> arr = ndtest((2, 3, 4))
>>> arr
a b\c c0 c1 c2 c3
a0 b0 0 1 2 3
a0 b1 4 5 6 7
a0 b2 8 9 10 11
a1 b0 12 13 14 15
a1 b1 16 17 18 19
a1 b2 20 21 22 23
To select the two points with label coordinates
[a0, b0, c0] and [a1, b2, c2], you must do:
>>> arr.points[['a0', 'a1'], ['b0', 'b2'], ['c0', 'c2']]
a_b_c a0_b0_c0 a1_b2_c2
0 22
>>> arr.points['a0,a1', 'b0,b2', 'c0,c2']
a_b_c a0_b0_c0 a1_b2_c2
0 22
The number of label(s) on each dimension must be equal:
>>> arr.points['a0,a1', 'b0,b2', 'c0,c1,c2'] # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
ValueError: all combined keys should have the same length
"""
__slots__ = ('array',)
def __init__(self, array):
self.array = array
def __getitem__(self, key):
return self.array.__getitem__(key, points=True)
def __setitem__(self, key, value):
self.array.__setitem__(key, value, points=True)
# TODO: add support for slices
# To select the first 4 values across all axes:
#
# >>> arr.iflat[:4]
# a_b a0_b0 a0_b1 a0_b2 a1_b0
# 0 10 20 30
class ArrayFlatIndicesIndexer:
r"""
Access the array by index as if it was flat (one dimensional) and all its axes were combined.
Notes
-----
In general arr.iflat[key] should be equivalent to (but much faster than) arr.combine_axes().i[key]
Examples
--------
>>> arr = ndtest((2, 3)) * 10
>>> arr
a\b b0 b1 b2
a0 0 10 20
a1 30 40 50
To select the first, second, fourth and fifth values across all axes:
>>> arr.combine_axes().i[[0, 1, 3, 4]]
a_b a0_b0 a0_b1 a1_b0 a1_b1
0 10 30 40
>>> arr.iflat[[0, 1, 3, 4]]
a_b a0_b0 a0_b1 a1_b0 a1_b1
0 10 30 40
Set the first and sixth values to 42
>>> arr.iflat[[0, 5]] = 42
>>> arr
a\b b0 b1 b2
a0 42 10 20
a1 30 40 42
When the key is an Array, the result will have the axes of the key
>>> key = Array([0, 3], 'c=c0,c1')
>>> key
c c0 c1
0 3
>>> arr.iflat[key]
c c0 c1
42 30
"""
__slots__ = ('array',)
def __init__(self, array):
self.array = array
def __getitem__(self, flat_key, sep='_'):
if isinstance(flat_key, ABCArray):
flat_np_key = flat_key.data
res_axes = flat_key.axes
else:
flat_np_key = np.asarray(flat_key)
res_axes = self.array.axes._combined_iflat(flat_np_key, sep=sep)
return Array(self.array.data.flat[flat_np_key], res_axes)
def __setitem__(self, flat_key, value):
# np.ndarray.flat is a flatiter object but it is indexable despite the name
self.array.data.flat[flat_key] = value
def __len__(self):
return self.array.size
# TODO: rename to ArrayIndexPointsIndexer or something like that
# TODO: show that we need to use a "full slice" for leaving the dimension alone
# TODO: document explicitly that axes should be in the correct order and missing axes should be slice None
# (except at the end)
class ArrayPositionalPointsIndexer:
r"""
Allows selection of arbitrary items in the array based on their N-dimensional index.
Examples
--------
>>> arr = ndtest((2, 3, 4))
>>> arr
a b\c c0 c1 c2 c3
a0 b0 0 1 2 3
a0 b1 4 5 6 7
a0 b2 8 9 10 11
a1 b0 12 13 14 15
a1 b1 16 17 18 19
a1 b2 20 21 22 23
To select the two points with index coordinates
[0, 0, 0] and [1, 2, 2], you must do:
>>> arr.ipoints[[0, 1], [0, 2], [0, 2]]
a_b_c a0_b0_c0 a1_b2_c2
0 22
The number of index(es) on each dimension must be equal:
>>> arr.ipoints[[0, 1], [0, 2], [0, 1, 2]] # doctest: +NORMALIZE_WHITESPACE
Traceback (most recent call last):
...
ValueError: all combined keys should have the same length
>>> arr.ipoints[[0, 1], [0, 2]]
a_b\c c0 c1 c2 c3
a0_b0 0 1 2 3
a1_b2 20 21 22 23
"""
__slots__ = ('array',)
def __init__(self, array):
self.array = array
def __getitem__(self, key):
return self.array.__getitem__(key, translate_key=False, points=True)
def __setitem__(self, key, value):
self.array.__setitem__(key, value, translate_key=False, points=True)
def get_axis(obj, i):
r"""
Return an axis according to its position.
Parameters
----------
obj : Array or other array
Input Array or any array object which has a shape attribute (NumPy or Pandas array).
i : int
index of the axis.
Returns
-------
Axis
Axis corresponding to the given index if input `obj` is an Array. A new anonymous Axis with the length of
the ith dimension of the input `obj` otherwise.
Examples
--------
>>> arr = ndtest((2, 2, 2))
>>> arr
a b\c c0 c1
a0 b0 0 1
a0 b1 2 3
a1 b0 4 5
a1 b1 6 7
>>> get_axis(arr, 1)
Axis(['b0', 'b1'], 'b')
>>> np_arr = np.zeros((2, 2, 2))
>>> get_axis(np_arr, 1)
Axis(2, None)
"""
return obj.axes[i] if isinstance(obj, Array) else Axis(obj.shape[i])
_arg_agg = {
'q': """
q : int in range of [0,100] (or sequence of floats)
Percentile to compute, which must be between 0 and 100 inclusive."""
}
_kwarg_agg = {
'dtype': {'value': None, 'doc': """
dtype : dtype, optional
The data type of the returned array. Defaults to None (the dtype of the input array)."""},
'out': {'value': None, 'doc': """
out : Array, optional
Alternate output array in which to place the result. It must have the same shape as the expected output and
its type is preserved (e.g., if dtype(out) is float, the result will consist of 0.0's and 1.0's).
Axes and labels can be different, only the shape matters. Defaults to None (create a new array)."""},
'ddof': {'value': 1, 'doc': """
ddof : int, optional
"Delta Degrees of Freedom": the divisor used in the calculation is ``N - ddof``, where ``N`` represents
the number of elements. Defaults to 1."""},
'skipna': {'value': None, 'doc': """
skipna : bool, optional
Whether to skip NaN (null) values. If False, resulting cells will be NaN if any of the aggregated
cells is NaN. Defaults to True."""},
'keepaxes': {'value': False, 'doc': """
keepaxes : bool or label-like, optional
Whether reduced axes are left in the result as dimensions with size one.
If True, reduced axes will contain a unique label representing the applied aggregation
(e.g. 'sum', 'prod', ...). It is possible to override this label by passing a specific value
(e.g. keepaxes='summation'). Defaults to False."""},
'method': {'value': 'linear', 'doc': """
method : str, optional
This parameter specifies the method to use for estimating the
percentile when the desired percentile lies between two indexes.
The different methods supported are described in the Notes section. The options are:
* 'inverted_cdf'
* 'averaged_inverted_cdf'
* 'closest_observation'
* 'interpolated_inverted_cdf'
* 'hazen'
* 'weibull'
* 'linear' (default)
* 'median_unbiased'
* 'normal_unbiased'
* 'lower'
* 'higher'
* 'midpoint'
* 'nearest'
The first three and last four methods are discontinuous. Defaults to 'linear'."""}
}
PERCENTILE_NOTES = """Notes
-----
Given a vector ``V`` of length ``n``, the q-th percentile of ``V`` is
the value ``q/100`` of the way from the minimum to the maximum in a
sorted copy of ``V``. The values and distances of the two nearest
neighbors as well as the `method` parameter will determine the
percentile if the normalized ranking does not match the location of
``q`` exactly. This function is the same as the median if ``q=50``, the
same as the minimum if ``q=0`` and the same as the maximum if
``q=100``.
The optional `method` parameter specifies the method to use when the
desired percentile lies between two indexes ``i`` and ``j = i + 1``.
In that case, we first determine ``i + g``, a virtual index that lies
between ``i`` and ``j``, where ``i`` is the floor and ``g`` is the
fractional part of the index. The final result is, then, an interpolation
of ``a[i]`` and ``a[j]`` based on ``g``. During the computation of ``g``,
``i`` and ``j`` are modified using correction constants ``alpha`` and
``beta`` whose choices depend on the ``method`` used. Finally, note that
since Python uses 0-based indexing, the code subtracts another 1 from the
index internally.
The following formula determines the virtual index ``i + g``, the location
of the percentile in the sorted sample:
.. math::
i + g = (q / 100) * ( n - alpha - beta + 1 ) + alpha
The different methods then work as follows
inverted_cdf:
method 1 of H&F [1]_.
This method gives discontinuous results:
* if g > 0 ; then take j
* if g = 0 ; then take i
averaged_inverted_cdf:
method 2 of H&F [1]_.
This method give discontinuous results:
* if g > 0 ; then take j
* if g = 0 ; then average between bounds
closest_observation:
method 3 of H&F [1]_.
This method give discontinuous results:
* if g > 0 ; then take j
* if g = 0 and index is odd ; then take j
* if g = 0 and index is even ; then take i
interpolated_inverted_cdf:
method 4 of H&F [1]_.
This method give continuous results using:
* alpha = 0
* beta = 1
hazen:
method 5 of H&F [1]_.
This method give continuous results using:
* alpha = 1/2
* beta = 1/2
weibull:
method 6 of H&F [1]_.
This method give continuous results using:
* alpha = 0
* beta = 0
linear:
method 7 of H&F [1]_.
This method give continuous results using:
* alpha = 1
* beta = 1
median_unbiased:
method 8 of H&F [1]_.
This method is probably the best method if the sample
distribution function is unknown (see reference).
This method give continuous results using:
* alpha = 1/3
* beta = 1/3
normal_unbiased:
method 9 of H&F [1]_.
This method is probably the best method if the sample
distribution function is known to be normal.
This method give continuous results using:
* alpha = 3/8
* beta = 3/8
lower:
NumPy method kept for backwards compatibility.
Takes ``i`` as the interpolation point.
higher:
NumPy method kept for backwards compatibility.
Takes ``j`` as the interpolation point.
nearest:
NumPy method kept for backwards compatibility.
Takes ``i`` or ``j``, whichever is nearest.
midpoint:
NumPy method kept for backwards compatibility.
Uses ``(i + j) / 2``."""
def _doc_agg_method(func, by=False, long_name='', action_verb='perform', extra_args=(), kwargs=()):
if not long_name:
long_name = func.__name__
_args = ','.join(extra_args) + ', ' if len(extra_args) > 0 else ''
_kwargs = ', '.join([f"{k}={_kwarg_agg[k]['value']!r}" for k in kwargs]) + ', ' if len(kwargs) > 0 else ''
signature = f'{func.__name__}({_args}*axes_and_groups, {_kwargs}**explicit_axes)'
if by:
specific_template = """The {long_name} is {action_verb}ed along all axes except the given one(s).
For groups, {long_name} is {action_verb}ed along groups and non associated axes."""
else:
specific_template = "Axis(es) or group(s) along which the {long_name} is {action_verb}ed."
doc_specific = specific_template.format(long_name=long_name, action_verb=action_verb)
doc_args = "".join(_arg_agg[arg] for arg in extra_args)
doc_kwargs = "".join(_kwarg_agg[kw]['doc'] for kw in kwargs)
doc_varargs = fr"""
\*axes_and_groups : None or int or str or Axis or Group or any combination of those
{doc_specific}
The default (no axis or group) is to {action_verb} the {long_name} over all the dimensions of the input
array.
An axis can be referred by:
* its index (integer). Index can be a negative integer, in which case it counts from the last to the
first axis.
* its name (str or AxisReference). You can use either a simple string ('axis_name') or the special
variable X (X.axis_name).
* a variable (Axis). If the axis has been defined previously and assigned to a variable, you can pass it as
argument.
You may not want to {action_verb} the {long_name} over a whole axis but over a selection of specific
labels. To do so, you have several possibilities:
* (['a1', 'a3', 'a5'], 'b1, b3, b5') : labels separated by commas in a list or a string
* ('a1:a5:2') : select labels using a slice (general syntax is 'start:end:step' where is 'step' is
optional and 1 by default).
* (a='a1, a2, a3', X.b['b1, b2, b3']) : in case of possible ambiguity, i.e. if labels can belong to more
than one axis, you must precise the axis.
* ('a1:a3; a5:a7', b='b0,b2; b1,b3') : create several groups with semicolons.
Names are simply given by the concatenation of labels (here: 'a1,a2,a3', 'a5,a6,a7', 'b0,b2' and 'b1,b3')
* ('a1:a3 >> a123', 'b[b0,b2] >> b12') : operator ' >> ' allows to rename groups."""
parameters = f"""Parameters
----------{doc_args}{doc_varargs}{doc_kwargs}"""
func.__doc__ = func.__doc__.format(signature=signature, parameters=parameters, percentile_notes=PERCENTILE_NOTES)
_always_return_float = {np.mean, np.nanmean, np.median, np.nanmedian, np.percentile, np.nanpercentile,
np.std, np.nanstd, np.var, np.nanvar}
def element_equal(a1, a2, rtol=0, atol=0, nan_equals=False):
warnings.warn("element_equal() is deprecated. Use array1.eq(array2, rtol, atol, nan_equals) instead.",
FutureWarning, stacklevel=2)
a1 = asarray(a1)
return a1.eq(a2, rtol, atol, nan_equals)
def nan_equal(a1, a2):
warnings.warn("nan_equal() is deprecated. Use array1.eq(array2, nans_equal=True) instead.",
FutureWarning, stacklevel=2)
return a1.eq(a2, nans_equal=True)
def _handle_meta(meta, title):
"""
Make sure meta is either None or a Metadata instance.
"""
if title is not None:
if meta is None:
meta = Metadata()
warnings.warn("title argument is deprecated. Please use meta argument instead", FutureWarning, stacklevel=2)
meta['title'] = title
if meta is None or isinstance(meta, Metadata):
return meta
# XXX: move this test in Metadata.__init__?
if not isinstance(meta, (list, dict)):
raise TypeError(f"Expected None, list of pairs, dict or Metadata object "
f"instead of {type(meta).__name__}")
return Metadata(meta)
# This prevents a warning in Pandas 1.4 <= version < 2.0 for arrays with object
# dtype which contain only numeric values. We force Pandas 2.0 behavior
# (ie use object dtype instead of inferring). See issue #1061.
def np_array_to_pd_index(array, name=None, tupleize_cols=True):
dtype = None if array.dtype.kind != 'O' else object
return pd.Index(array, dtype=dtype, name=name, tupleize_cols=tupleize_cols)
def align_arrays(values, join='outer', fill_value=nan, axes=None):
bad_values = [value for value in values
if not isinstance(value, Array) and not np.isscalar(value)]
if bad_values:
bad_types = set(type(v) for v in bad_values)
bad_type_names = sorted(t.__name__ for t in bad_types)
raise TypeError("align only supports Arrays and scalars but got:"
f"{', '.join(bad_type_names)}")
axis_collections = [
value.axes if isinstance(value, Array) else AxisCollection()
for value in values
]
# fail early because reindex does not currently support anonymous axes
if any(any(name is None for name in axis_col.names)
for axis_col in axis_collections):
raise ValueError("arrays with anonymous axes are currently not "
"supported by Array.align")
try:
aligned_axis_collections = align_axis_collections(axis_collections,
join=join, axes=axes)
except ValueError as e:
raise ValueError(f"Arrays are not aligned because {e}")
return tuple(value.reindex(aligned_axes, fill_value=fill_value)
if isinstance(value, Array)
else value
for value, aligned_axes in zip(values, aligned_axis_collections))
class Array(ABCArray):
r"""
An Array object represents a multidimensional, homogeneous array of fixed-size items with labeled axes.
The function :func:`asarray` can be used to convert a NumPy array or Pandas DataFrame into an Array.
Parameters
----------
data : scalar, tuple, list or NumPy ndarray
Input data.
axes : collection (tuple, list or AxisCollection) of axes (int, str or Axis), optional
Axes.
title : str, optional
Deprecated. See 'meta' below.
meta : list of pairs or dict or Metadata, optional
Metadata (title, description, author, creation_date, ...) associated with the array.
Keys must be strings. Values must be of type string, int, float, date, time or datetime.
dtype : type, optional
Datatype for the array. Defaults to None (inferred from the data).
Attributes
----------
data : NumPy ndarray
Data.
axes : AxisCollection
Axes.
meta : Metadata
Metadata (title, description, author, creation_date, ...) associated with the array.
See Also
--------
sequence : Create an Array by sequentially applying modifications to the array along axis.
ndtest : Create a test Array with increasing elements.
zeros : Create an Array, each element of which is zero.
ones : Create an Array, each element of which is 1.
full : Create an Array filled with a given value.
empty : Create an Array, but leave its allocated memory unchanged (i.e., it contains “garbage”).
Warnings
--------
Metadata is not kept when actions or methods are applied on an array
except for operations modifying the object in-place, such as: `pop[age < 10] = 0`.
Do not add metadata to an array if you know you will apply actions or methods
on it before dumping it.
Examples
--------
>>> age = Axis([10, 11, 12], 'age')
>>> sex = Axis('sex=M,F')
>>> time = Axis([2007, 2008, 2009], 'time')
>>> axes = [age, sex, time]
>>> data = np.zeros((len(axes), len(sex), len(time)))
>>> Array(data, axes)
age sex\time 2007 2008 2009
10 M 0.0 0.0 0.0
10 F 0.0 0.0 0.0
11 M 0.0 0.0 0.0
11 F 0.0 0.0 0.0
12 M 0.0 0.0 0.0
12 F 0.0 0.0 0.0
>>> # with metadata
>>> arr = Array(data, axes, meta=Metadata(title='my title', author='John Smith'))
Array creation functions
>>> full(axes, 10.0)
age sex\time 2007 2008 2009
10 M 10.0 10.0 10.0
10 F 10.0 10.0 10.0
11 M 10.0 10.0 10.0
11 F 10.0 10.0 10.0
12 M 10.0 10.0 10.0
12 F 10.0 10.0 10.0
>>> arr = empty(axes)
>>> arr['F'] = 1.0
>>> arr['M'] = -1.0
>>> arr
age sex\time 2007 2008 2009
10 M -1.0 -1.0 -1.0
10 F 1.0 1.0 1.0
11 M -1.0 -1.0 -1.0
11 F 1.0 1.0 1.0
12 M -1.0 -1.0 -1.0
12 F 1.0 1.0 1.0
>>> bysex = sequence(sex, initial=-1, inc=2)
>>> bysex
sex M F
-1 1
>>> sequence(age, initial=10, inc=bysex)
sex\age 10 11 12
M 10 9 8
F 10 11 12
"""
__slots__ = ('data', 'axes', '_meta')
def __init__(self, data, axes=None, title=None, meta=None, dtype=None):
data = np.asarray(data, dtype=dtype)
ndim = data.ndim
if axes is None:
axes = AxisCollection(data.shape)
else:
if not isinstance(axes, AxisCollection):
axes = AxisCollection(axes)
if axes.ndim != ndim:
raise ValueError(f"number of axes ({axes.ndim}) does not match "
f"number of dimensions of data ({ndim})")
if axes.shape != data.shape:
raise ValueError(f"length of axes {axes.shape} does not match "
f"data shape {data.shape}")
self.data = data
self.axes = axes
if meta is not None or title is not None: