Applied Data Analysis in Python
In [1]:
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
X, y = load_diabetes(as_frame=True, return_X_y=True)
X.head()
Out[1]:
In [2]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, random_state=42)
model = LinearRegression(fit_intercept=True)
model.fit(train_X[["bmi"]], train_y)
Out[2]:
In [3]:
model.score(test_X[["bmi"]], test_y)
Out[3]:
In [4]:
import pandas as pd
pred = pd.DataFrame({"bmi": [X["bmi"].min(), X["bmi"].max()]})
pred["y"] = model.predict(pred)
In [5]:
import seaborn as sns
sns.relplot(data=X, x="bmi", y=y)
sns.lineplot(data=pred, x="bmi", y="y", c="red", linestyle=":")
Out[5]: