Good night, stack!
I am facing an issue related to pandas library on Python. Im trying to automate a mass append/concat on multiple excel files (with multiple sheets). H
I wrote this simple function to concat excel files and handle excel files with missing work-sheets. feel free to adapt it to your own use-case
the main thing to note is the try
and except
to handle the errors.
import pandas as pd
from pathlib import Path
from xlrd import XLRDError
concat_excels(src,'Sheet2',trg)
No sheet named <'Sheet2'> in file_0.xlsx, skipping
No sheet named <'Sheet2'> in file_1.xlsx, skipping
No sheet named <'Sheet2'> in file_2.xlsx, skipping
No sheet named <'Sheet2'> in file_3.xlsx, skipping
No sheet named <'Sheet2'> in file_4.xlsx, skipping
No sheet named <'Sheet2'> in file_5.xlsx, skipping
No sheet named <'Sheet2'> in file_6.xlsx, skipping
No sheet named <'Sheet2'> in file_7.xlsx, skipping
No sheet named <'Sheet2'> in file_8.xlsx, skipping
No sheet named <'Sheet2'> in file_9.xlsx, skipping
File Saved to C:\Users\DataNovice\OneDrive\Documents\2020\python\file_io_ops\move_files_test
def concat_excels(source_path, sheet_name, target_path):
"""
A simple script to find excel files in a target
location and merge them into a single file.
You need Python installed along with Pandas.
pathlib is available in Python 3.4 +
error handling added.
"""
# create list for excel files.
excel_files = [file for file in Path(source_path).glob("*.xlsx")]
# create empty list to store each individual dataframe.
excel_dataframe = []
# loop through our file to read each file and append it to our list.
for file in excel_files:
try:
df = pd.read_excel(file, sheet_name=sheet_name)
df.columns = df.columns.str.lower() # lowercase all columns
df.columns = (
df.columns.str.strip()
) # remove any trailing or leading white space.
excel_dataframe.append(df)
except XLRDError as err:
print(f"{err} in {file.name}, skipping")
try:
final_dataframe = pd.concat(excel_dataframe, axis=1)
final_dataframe.to_excel(target_path + "\master_file.xlsx", index=False)
print(f"File Saved to {target_path}")
except ValueError as err_2:
print(
f"No Sheets Matched in any of your excel files, are you sure {sheet_name} is correct?"
)
return excel_dataframe