12.3.10.4.5. Merge data#

import pandas as pd
import numpy as np

Concat

Create a dataFrame

dataFrame = pd.DataFrame(np.random.randn(10, 4))

break in pieces

pieces = [dataFrame[:3], dataFrame[3:7], dataFrame[7:]]
pd.concat(pieces)
0 1 2 3
0 -0.381824 -0.615609 0.353890 0.073478
1 -0.627941 0.523366 0.763896 -0.858404
2 1.434160 0.695704 0.295234 0.947809
3 -0.318871 -0.470462 -0.500630 -1.794801
4 -1.053845 0.739370 -2.324050 0.441212
5 0.585735 1.022914 0.216315 0.836352
6 -0.770376 -2.096072 0.205964 -0.445026
7 -0.081697 -0.005139 0.267725 -0.149331
8 0.386357 0.231614 1.104906 2.569610
9 0.119584 -1.526672 0.358645 -0.081198


Join

left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "foo"], "rval": [4, 5]})
pd.merge(left, right, on="key")
key lval rval
0 foo 1 4
1 foo 1 5
2 foo 2 4
3 foo 2 5


Grouping

dataFrame = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)
dataFrame.groupby("A").sum()
B C D
A
bar onethreetwo -0.312961 0.144192
foo onetwotwoonethree -1.740804 -0.837059


dataFrame.groupby(["A", "B"]).sum()
C D
A B
bar one 0.096750 0.952195
three 0.157577 -0.027870
two -0.567287 -0.780133
foo one 0.448194 -0.132610
three 0.652331 0.879493
two -2.841329 -1.583942