[Case] Monitor storage increase
def getFileSummaryByMonth(directory, path):
folder = path
filepaths = [os.path.join(folder, f) for f in os.listdir(folder)]
stats = {}
i = 0
for file in filepaths:
# Get the unique date for each file
file_name = file
last_Mod = os.stat(file).st_ctime
size = os.stat(file).st_size
str1 = time.ctime(os.path.getmtime(file_name))
datetime_object = datetime.datetime.strptime(str1, '%a %b %d %H:%M:%S %Y')
date = datetime_object.strftime("%Y-%m") # 06/07/2013
# Add values to dictionary
stats = {'Index': i, 'file_name': file_name, 'date': date, 'size': size}
print(stats)
i = i+1
dict_obj.add(i, stats)
# Create duplicate file info dictionary
filestats = stats
# Convert to dataframe
filestats = pd.DataFrame.from_dict(dict_obj,orient='index')
print(filestats)
# Aggregate files by unique month and filesize
filestats["date"] = pd.to_datetime(filestats["date"])
grouped_by_month_stats = filestats.groupby(filestats['date'].dt.strftime('%Y-%m'))['size'].sum().sort_values()
print(grouped_by_month_stats)
df = pd.Series.to_frame(grouped_by_month_stats)
df.sort_values(by='date', inplace=True, ascending=False)
# Write each filtered dataframe to a different worksheet in the same Excel file
df.to_excel(writer, sheet_name=directory)
Usage:
getFileSummaryByMonth('reporting_service', 'C://first_path//')
getFileSummaryByMonth('archive_service', 'C://second_path//')
getFileSummaryByMonth('search_service', 'C://third_path//')
Last updated
Was this helpful?