Melt the Upper Triangular Matrix of a Pandas Dataframe Melt the Upper Triangular Matrix of a Pandas Dataframe numpy numpy

Melt the Upper Triangular Matrix of a Pandas Dataframe


First I convert lower values of df to NaN by where and numpy.triu and then stack, reset_index and set column names:

import numpy as npprint df     a    b    ca  1.0  0.5  0.3b  0.5  1.0  0.4c  0.3  0.4  1.0print np.triu(np.ones(df.shape)).astype(np.bool)[[ True  True  True] [False  True  True] [False False  True]]df = df.where(np.triu(np.ones(df.shape)).astype(np.bool))print df    a    b    ca   1  0.5  0.3b NaN  1.0  0.4c NaN  NaN  1.0df = df.stack().reset_index()df.columns = ['Row','Column','Value']print df  Row Column  Value0   a      a    1.01   a      b    0.52   a      c    0.33   b      b    1.04   b      c    0.45   c      c    1.0


Building from solution by @jezrael, boolean indexing would be a more explicit approach:

import numpyfrom pandas import DataFramedf = DataFrame({'a':[1,.5,.3],'b':[.5,1,.4],'c':[.3,.4,1]},index=list('abc'))print df,'\n'keep = np.triu(np.ones(df.shape)).astype('bool').reshape(df.size)print df.stack()[keep]

output:

     a    b    ca  1.0  0.5  0.3b  0.5  1.0  0.4c  0.3  0.4  1.0 a  a    1.0   b    0.5   c    0.3b  b    1.0   c    0.4c  c    1.0dtype: float64


Also buildin on solution by @jezrael, here's a version adding a function to do the inverse operation (from xy to matrix), usefull in my case to work with covariance / correlation matrices.

def matrix_to_xy(df, columns=None, reset_index=False):    bool_index = np.triu(np.ones(df.shape)).astype(bool)    xy = (        df.where(bool_index).stack().reset_index()        if reset_index        else df.where(bool_index).stack()    )    if reset_index:        xy.columns = columns or ["row", "col", "val"]    return xydef xy_to_matrix(xy):    df = xy.pivot(*xy.columns).fillna(0)    df_vals = df.to_numpy()    df = pd.DataFrame(        np.triu(df_vals, 1) + df_vals.T, index=df.index, columns=df.index    )    return dfdf = pd.DataFrame(    {"a": [1, 0.5, 0.3], "b": [0.5, 1, 0.4], "c": [0.3, 0.4, 1]},    index=list("abc"),)print(df)xy = matrix_to_xy(df, reset_index=True)print(xy)mx = xy_to_matrix(xy)print(mx)

output:

     a    b    ca  1.0  0.5  0.3b  0.5  1.0  0.4c  0.3  0.4  1.0  row col  val0   a   a  1.01   a   b  0.52   a   c  0.33   b   b  1.04   b   c  0.45   c   c  1.0row    a    b    crowa    1.0  0.5  0.3b    0.5  1.0  0.4c    0.3  0.4  1.0