How To See The Progress Bar Of Read_csv
I'm trying to read 100GB size of csv file I want to see the profess bar when they reading file file = pd.read_csv('../code/csv/file.csv') like =====> 30% is there way to se
Solution 1:
import os
import sys
from tqdm import tqdm
temp = pd.read_csv(INPUT_FILENAME, nrows=20)
N = len(temp.to_csv(index=False))
df = [temp[:0]]
t = int(os.path.getsize(fn)/N*20/10**5) + 1with tqdm(total = t, file = sys.stdout) as pbar:
for i,chunk inenumerate(pd.read_csv(fn, chunksize=10**5, low_memory=False)):
df.append(chunk)
pbar.set_description('Importing: %d' % (1 + i))
pbar.update(1)
data = temp[:0].append(df)
del df
Solution 2:
A fancy output with typer
module, which I have tested in Jupyter Notebook with a massive delimited text file having 618k rows.
from pathlib import Path
import pandas as pd
import tqdm
import typer
txt = Path("<path-to-massive-delimited-txt-file>").resolve()
# read number of rows quickly
length = sum(1for row inopen(txt, 'r'))
# define a chunksize
chunksize = 5000# initiate a blank dataframe
df = pd.DataFrame()
# fancy logging with typer
typer.secho(f"Reading file: {txt}", fg="red", bold=True)
typer.secho(f"total rows: {length}", fg="green", bold=True)
# tqdm contextwith tqdm.auto.tqdm(total=length, desc="chunks read: ") as bar:
# enumerate chunks read without low_memory (it is massive for pandas to precisely assign dtypes)for i, chunk inenumerate(pd.read_csv(txt, chunksize=chunksize, low_memory=False)):
# print the chunk numberprint(i)
# append it to df
df = df.append(other=chunk)
# update tqdm progress bar
bar.update(chunksize)
# 6 chunks are enough to testif i==5:
break# finally inform with a friendly message
typer.secho("end of reading chunks...", fg=typer.colors.BRIGHT_RED)
typer.secho(f"Dataframe length:{len(df)}", fg="green", bold=True)
Post a Comment for "How To See The Progress Bar Of Read_csv"