12.3.10.4.7. Operations#

import pandas as pd
import numpy as np


dates = pd.date_range("20220501", periods=6)
dataFrame = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

Statistics

dataFrame.mean()
A    0.510904
B    0.309009
C   -0.114758
D   -0.246521
dtype: float64

Mean value of axis 1:

dataFrame.mean(1)
2022-05-01    0.090335
2022-05-02   -0.014631
2022-05-03   -0.350706
2022-05-04   -0.214797
2022-05-05    0.602078
2022-05-06    0.575671
Freq: D, dtype: float64
series = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
dataFrame.sub(series, axis="index")
A B C D
2022-05-01 NaN NaN NaN NaN
2022-05-02 NaN NaN NaN NaN
2022-05-03 -0.284007 -0.906460 -2.257962 -1.954394
2022-05-04 -1.867116 -3.854271 -3.230730 -3.907071
2022-05-05 -4.482478 -3.683779 -4.677417 -4.748012
2022-05-06 NaN NaN NaN NaN


Apply

dataFrame.apply(np.cumsum)
A B C D
2022-05-01 -0.063978 0.239891 0.216541 -0.031114
2022-05-02 0.064901 0.196255 0.275400 -0.233739
2022-05-03 0.780894 0.289795 -0.982562 -1.188133
2022-05-04 1.913777 -0.564475 -1.213292 -2.095204
2022-05-05 2.431299 0.751746 -0.890709 -1.843216
2022-05-06 3.065426 1.854051 -0.688547 -1.479128


dataFrame.apply(lambda x: x.max() - x.min())
A    1.196861
B    2.170492
C    1.580545
D    1.318483
dtype: float64

Histogramming

series = pd.Series(np.random.randint(0, 7, size=10))
series.value_counts()
4    4
1    2
0    1
5    1
2    1
3    1
Name: count, dtype: int64

String methods

series = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])
series.str.lower()
0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object