I have a DataFrame using pandas and column labels that I need to edit to replace the original column labels.
I\'d like to change the column names in a DataFrame
Here's a nifty little function I like to use to cut down on typing:
def rename(data, oldnames, newname):
if type(oldnames) == str: #input can be a string or list of strings
oldnames = [oldnames] #when renaming multiple columns
newname = [newname] #make sure you pass the corresponding list of new names
i = 0
for name in oldnames:
oldvar = [c for c in data.columns if name in c]
if len(oldvar) == 0:
raise ValueError("Sorry, couldn't find that column in the dataset")
if len(oldvar) > 1: #doesn't have to be an exact match
print("Found multiple columns that matched " + str(name) + " :")
for c in oldvar:
print(str(oldvar.index(c)) + ": " + str(c))
ind = input('please enter the index of the column you would like to rename: ')
oldvar = oldvar[int(ind)]
if len(oldvar) == 1:
oldvar = oldvar[0]
data = data.rename(columns = {oldvar : newname[i]})
i += 1
return data
Here is an example of how it works:
In [2]: df = pd.DataFrame(np.random.randint(0,10,size=(10, 4)), columns=['col1','col2','omg','idk'])
#first list = existing variables
#second list = new names for those variables
In [3]: df = rename(df, ['col','omg'],['first','ohmy'])
Found multiple columns that matched col :
0: col1
1: col2
please enter the index of the column you would like to rename: 0
In [4]: df.columns
Out[5]: Index(['first', 'col2', 'ohmy', 'idk'], dtype='object')