わからない
- Pandasの行列入れ替えは簡単ですが、条件に従う行の列値入れ替えについては情報がなかった
import pandas as pd
import numpy as np
from IPython.display import display, HTML
検証内容
- 列test3が1の場合、test2のデータをtest1に格納する
fuga=np.zeros(10)
hoge=pd.DataFrame(fuga,columns=["test1"])
fuga=np.zeros(10)
fuga[:]=-1
hoge["test2"]=fuga
hoge["test3"]=[0,1,0,1,0,1,0,1,0,0]
display(hoge)
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
0.0 |
-1.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
0.0 |
-1.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
0.0 |
-1.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
0.0 |
-1.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |
ど直球にやると
fuga=np.zeros(10)
hoge=pd.DataFrame(fuga,columns=["test1"])
fuga=np.zeros(10)
fuga[:]=-1
hoge["test2"]=fuga
hoge["test3"]=[0,1,0,1,0,1,0,1,0,0]
display(hoge)
hoge2=pd.DataFrame()
for i, row in hoge.iterrows():
if row["test3"]== 1:
row["test1"]=row["test2"]
row=pd.DataFrame(row)
hoge2=pd.concat([hoge2,row.T])
hoge2=hoge2.reset_index(drop=True)
hoge2
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
0.0 |
-1.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
0.0 |
-1.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
0.0 |
-1.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
0.0 |
-1.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0.0 |
1 |
-1.0 |
-1.0 |
1.0 |
2 |
0.0 |
-1.0 |
0.0 |
3 |
-1.0 |
-1.0 |
1.0 |
4 |
0.0 |
-1.0 |
0.0 |
5 |
-1.0 |
-1.0 |
1.0 |
6 |
0.0 |
-1.0 |
0.0 |
7 |
-1.0 |
-1.0 |
1.0 |
8 |
0.0 |
-1.0 |
0.0 |
9 |
0.0 |
-1.0 |
0.0 |
問題
- データ件数が少ない場合は、上記でも問題ない
- 多次元*100万データでは、1時間待っても反応がない
解決策
- 条件に従うインデックスを記録
- インデックス記載のデータを格納する
fuga=np.zeros(10)
hoge=pd.DataFrame(fuga,columns=["test1"])
fuga=np.zeros(10)
fuga[:]=-1
hoge["test2"]=fuga
hoge["test3"]=[0,1,0,1,0,1,0,1,0,0]
display(hoge)
piyo=hoge[hoge["test3"]==1].index
hoge.ix[piyo,"test1"]=hoge.ix[piyo,"test2"]
hoge
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
0.0 |
-1.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
0.0 |
-1.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
0.0 |
-1.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
0.0 |
-1.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
-1.0 |
-1.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
-1.0 |
-1.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
-1.0 |
-1.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
-1.0 |
-1.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |
応用
fuga=np.zeros(10)
hoge=pd.DataFrame(fuga,columns=["test1"])
fuga=np.zeros(10)
fuga[:]=-1
hoge["test2"]=fuga
hoge["test3"]=[0,1,0,1,0,1,0,1,0,0]
display(hoge)
bk_df=hoge.copy()
piyo=hoge[hoge["test3"]==1].index
hoge.ix[piyo,"test1"]=hoge.ix[piyo,"test2"]
hoge.ix[piyo,"test2"]=bk_df.ix[piyo,"test1"]
hoge
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
0.0 |
-1.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
0.0 |
-1.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
0.0 |
-1.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
0.0 |
-1.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
-1.0 |
0.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
-1.0 |
0.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
-1.0 |
0.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
-1.0 |
0.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |
メモリを気にする場合
fuga=np.zeros(10)
hoge=pd.DataFrame(fuga,columns=["test1"])
fuga=np.zeros(10)
fuga[:]=-1
hoge["test2"]=fuga
hoge["test3"]=[0,1,0,1,0,1,0,1,0,0]
display(hoge)
piyo=hoge[hoge["test3"]==1].index
bk_values=hoge.ix[piyo,"test1"].values
hoge.ix[piyo,"test1"]=hoge.ix[piyo,"test2"]
hoge.ix[piyo,"test2"]=bk_values
hoge
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
0.0 |
-1.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
0.0 |
-1.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
0.0 |
-1.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
0.0 |
-1.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |
|
test1 |
test2 |
test3 |
0 |
0.0 |
-1.0 |
0 |
1 |
-1.0 |
0.0 |
1 |
2 |
0.0 |
-1.0 |
0 |
3 |
-1.0 |
0.0 |
1 |
4 |
0.0 |
-1.0 |
0 |
5 |
-1.0 |
0.0 |
1 |
6 |
0.0 |
-1.0 |
0 |
7 |
-1.0 |
0.0 |
1 |
8 |
0.0 |
-1.0 |
0 |
9 |
0.0 |
-1.0 |
0 |