How to share pandas DataFrame object between processes? How to share pandas DataFrame object between processes? pandas pandas

How to share pandas DataFrame object between processes?


You can use a Namespace Manager, the following code works as you expect.

#-*- coding: UTF-8 -*-'import pandas as pdimport numpy as npfrom multiprocessing import *import multiprocessing.sharedctypes as sharedctypesimport ctypesdef add_new_derived_column(ns):    dataframe2 = ns.df    dataframe2['new_column']=dataframe2['A']+dataframe2['B'] / 2    print (dataframe2.head())    ns.df = dataframe2if __name__ == "__main__":    mgr = Manager()    ns = mgr.Namespace()    dataframe = pd.DataFrame(np.random.randn(100000, 2), columns=['A', 'B'])    ns.df = dataframe    print (dataframe.head())    # then I pass the "shared_df_obj" to Mulitiprocessing.Process object    process=Process(target=add_new_derived_column, args=(ns,))    process.start()    process.join()    print (ns.df.head())