A few lines of code that i use often with Pandas

A few lines of code that i use often with Pandas

Some line of codes that i uses very frequently when coding with Pandas


#to change the type of a column
df['col_name']=df['col_name'].astype('str')
df['col_name']=df['col_name'].astype('int')

#to approximate to 5 digits
df=df.round(5)

#executes the merge between two dataframe
df_merged=pd.merge(df1,df2, left_on='key1',right_on='key2',how='inner')

#generates a dataframes using a dictionary generated with a for loop
i=0
for x in somthing
     d[i] = {'word':word,'index':index}
     i+=1              
df = pd.DataFrame.from_dict(d, 'index')

#removes rows with at least one missing value
df.dropna(thresh=1)

#renames the columns names
df.rename(columns={0:'new_name_1','1':'new_name_2','col_name':'new_name_3'},inplace=True)

#removes rows with a specific character inside
df = df.drop(df[df.col_name.str.contains('�',na=False)].index)

#keeps only rows with a value above 500
df=df[df['col_name']>500]

#applys a function over all rows
df['col_name'] = df.apply(lambda row: function_name(row), axis=1)

#filters over a column using multiple values
list_terms=['NOUN', 'PROPN']
df = df[~df.col_name.isin(list_pos_to_remouve)] #(removes rows)
df = df[df.col_name.isin(list_pos_to_remouve)] #(keeps rows)

#generates an empty dataframe
df=pd.DataFrame({'col_name_1':[],'col_name_2':[]})

#appends a new column with the lengths of the strings of an another column
df['col_name_2']=df['col_name_1'].str.len() > 2

#brings all the strings of a column to lowercase
df['col_name']=df['col_name'].str.lower()

#appends a new column with the concatenation of other two columns
df['combined']=df['col_name_1']+' '+df['col_name_2']

#calculates the groups size of a given column
df = df.groupby(['col_name']).size().reset_index()

#sorts values using a colulmn
df.sort_values(by=['word'])

#formats a date column with a specific format ad keeps only the last 20 days
df['date']=pd.to_datetime(df['date'], format='%Y_%m_%d')
cutoff_date = df["date"].max() - pd.Timedelta(days=20)
df = df[df['date'] > cutoff_date] 

#generates a column with list of values after executed a group by call
df=df.groupby('col_name')['col_name_2'].apply(list).reset_index(name='list')

#keeps only the groups with more than 10 elements
grouped = df.groupby('col_name')
grouped_filtered = grouped.filter(lambda x: x['col_name'].count() > 10)