Below, I read a table and I apply the ordinary least squares methode to it:
import pandas as pd
from sqlalchemy.engine import create_engine
from sqlalchemy.engine import URL
import sqlalchemy as sa
import statsmodels.api as sm
table_name = 'MIGRATED_DIVORCE_SETTLEMENT__C'
connection_string = "DRIVER={ODBC Driver 17 for SQL Server};SERVER=DESKTOP-8J58OIP\MSSQLSERVER_19;DATABASE=Speel;UID=sa;PWD=**"
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
engine = create_engine(connection_url)
with engine.begin() as conn:
df = pd.read_sql_query(sa.text("SELECT * FROM Rapportage." + table_name), conn)
onderzoek = df[['Person_ID_1__c','Person_ID_2__c']].apply(pd.to_numeric).dropna()
print(onderzoek.info())
print(onderzoek.head())
y = onderzoek['Person_ID_1__c']
x = onderzoek['Person_ID_2__c']
x = sm.add_constant(x)
print(type(x))
print(type(y))
model = sm.OLS(y, x).fit()
#view model summary
print(model.summary())
print('**********programma beindigd****')