[Case] Monitor storage increase

def getFileSummaryByMonth(directory, path):

folder = path

filepaths = [os.path.join(folder, f) for f in os.listdir(folder)]

stats = {}

i = 0

for file in filepaths:

# Get the unique date for each file

file_name = file

last_Mod = os.stat(file).st_ctime

size = os.stat(file).st_size

str1 = time.ctime(os.path.getmtime(file_name))

datetime_object = datetime.datetime.strptime(str1, '%a %b %d %H:%M:%S %Y')

date = datetime_object.strftime("%Y-%m") # 06/07/2013

# Add values to dictionary

stats = {'Index': i, 'file_name': file_name, 'date': date, 'size': size}

print(stats)

i = i+1

dict_obj.add(i, stats)

# Create duplicate file info dictionary

filestats = stats

# Convert to dataframe

filestats = pd.DataFrame.from_dict(dict_obj,orient='index')

print(filestats)

# Aggregate files by unique month and filesize

filestats["date"] = pd.to_datetime(filestats["date"])

grouped_by_month_stats = filestats.groupby(filestats['date'].dt.strftime('%Y-%m'))['size'].sum().sort_values()

print(grouped_by_month_stats)

df = pd.Series.to_frame(grouped_by_month_stats)

df.sort_values(by='date', inplace=True, ascending=False)

# Write each filtered dataframe to a different worksheet in the same Excel file

df.to_excel(writer, sheet_name=directory)

Usage:

getFileSummaryByMonth('reporting_service', 'C://first_path//')

getFileSummaryByMonth('archive_service', 'C://second_path//')

getFileSummaryByMonth('search_service', 'C://third_path//')

Ref: https://towardsdatascience.com/projecting-and-visualizing-infrastructure-growth-trends-with-python-1e578d9e4260

Last updated