Below, you see a logistic regression with Panda
import pandas as pd import numpy as np from sklearn import preprocessing import matplotlib.pyplot as plt plt.rc("font", size=14) from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split import seaborn as sns sns.set(style="white") sns.set(style="whitegrid", color_codes=True) data = pd.read_csv(r"C:\Users\tomva\SynologyDrive\python\pandas\Incomplete\banking.csv", header = 0) data['education']=np.where(data['education'] =='basic.9y', 'Basic', data['education']) data['education']=np.where(data['education'] =='basic.6y', 'Basic', data['education']) data['education']=np.where(data['education'] =='basic.4y', 'Basic', data['education']) print(data['education'].unique()) print(data['y'].value_counts()) cat_vars=['education'] for var in cat_vars: cat_list='var'+'_'+var cat_list = pd.get_dummies(data[var], prefix=var) data1=data.join(cat_list) data=data1 cols=['education_Basic', 'education_high.school', 'education_professional.course', 'education_university.degree'] X=data[cols] y=data['y'] import statsmodels.api as sm logit_model=sm.Logit(y,X) result=logit_model.fit() print(result.summary2()) print(data.groupby('education_Basic')["y"].mean()) print(data.groupby('education_high.school')["y"].mean()) print(data.groupby('education_professional.course')["y"].mean()) print(data.groupby('education_university.degree')["y"].mean())