Below, you see a logistic regression with Panda
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
plt.rc("font", size=14)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)
data = pd.read_csv(r"C:\Users\tomva\SynologyDrive\python\pandas\Incomplete\banking.csv", header = 0)
data['education']=np.where(data['education'] =='basic.9y', 'Basic', data['education'])
data['education']=np.where(data['education'] =='basic.6y', 'Basic', data['education'])
data['education']=np.where(data['education'] =='basic.4y', 'Basic', data['education'])
print(data['education'].unique())
print(data['y'].value_counts())
cat_vars=['education']
for var in cat_vars:
cat_list='var'+'_'+var
cat_list = pd.get_dummies(data[var], prefix=var)
data1=data.join(cat_list)
data=data1
cols=['education_Basic', 'education_high.school', 'education_professional.course', 'education_university.degree']
X=data[cols]
y=data['y']
import statsmodels.api as sm
logit_model=sm.Logit(y,X)
result=logit_model.fit()
print(result.summary2())
print(data.groupby('education_Basic')["y"].mean())
print(data.groupby('education_high.school')["y"].mean())
print(data.groupby('education_professional.course')["y"].mean())
print(data.groupby('education_university.degree')["y"].mean())