from itertools import combinations
# Create sample dataset.
idx = pd.MultiIndex(
levels=[[u'2017-1-1', u'2017-1-2'], [u'A', u'B', u'C']],
labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
names=[u'date', u'ticker'])
df = pd.DataFrame(np.random.randn(6, 3), index=idx, columns=list('ABC'))
for tup in zip(range(6), range(3) * 2):
df.iloc[tup] = 1
>>> df
A B C
date ticker
2017-1-1 A 1.000000 0.440276 -1.087536
B -0.809949 1.000000 -0.548897
C 0.922866 -0.788699 1.000000
2017-1-2 A 1.000000 -0.106493 0.034319
B 0.080990 1.000000 0.218323
C 0.051651 -0.680358 1.000000
# Unstack and remove duplicates.
tickers = df.columns.tolist()
df = df.unstack().sort_index(axis=1)
pairs = df.columns.get_values().tolist()
df.columns = ["{0} vs. {1}".format(*pair) for pair in pairs]
mask = [n for n, pair in enumerate(pairs) if pair in list(combinations(tickers, 2))]
df = df.iloc[:, mask]
>>> df
A vs. B A vs. C B vs. C
date
2017-1-1 -0.809949 0.922866 -0.788699
2017-1-2 0.080990 0.051651 -0.680358
相关问题 更多 >
编程相关推荐