1. for finding percentage of null values
missing = pd.DataFrame((telecom_data.isnull().sum())*100/telecom_data.shape[0]).reset_index()
plt.figure(figsize=(16,5))
ax = sns.pointplot('index',0,data=missing)
plt.xticks(rotation=90,fontsize=10)
plt.title("% of missing values")
plt.show()
  1. for dropping rows that have a null value
tmp_data.dropna(how='any',inplace=True)
  1. for forming buckets/bins in the values
# grouping in 12 month buckets
labels = ["{0} - {1}".format(i,i+11) for i in range(1,72,12)]
tmp_data['tenure_group'] = pd.cut(tmp_data.tenure,range(1,80,12),right=False,labels=labels)
  1. pandassql
pd.read_sql('SELECT int_column, date_column FROM test_data', conn)
  1. increasing size of figures
import matplotlib
matplotlib.rcParams['figure.figsize'] = (12,6)
  1. converting value counts into dict
df.feature.value_counts().to_dict()
  1. how to iterate over a list and map a value to each of them
variable = ['A','B','C','D','E']
ordinal_encoding = {k:i for i,k in enumerate(variable,0)