how can load csv file big in ipython? seems cannot loaded @ once in memory.
you can use code read file in chunks , distribute file on multiple processors.
import pandas pd import multiprocessing mp large_file = "yourfile.csv" chunksize = 100000 # processing 100,000 rows @ time def process_frame(df): # process data frame return len(df) if __name__ == '__main__': reader = pd.read_csv(large_file, chunksize=chunksize) pool = mp.pool(4) # use 4 processes funclist = [] df in reader: # process each data frame f = pool.apply_async(process_frame,[df]) funclist.append(f) result = 0 f in funclist: result += f.get(timeout=10) # timeout in 10 seconds
Comments
Post a Comment