classification_df = pd.read_csv("data/quiz2-grade-toy-classification.csv") classification_df.head()
X = classification_df.drop(columns=["quiz2"]) y = classification_df["quiz2"]
from sklearn.tree import DecisionTreeClassifier model = DecisionTreeClassifier(max_depth=1) model.fit(X, y)
model2 = DecisionTreeClassifier(max_depth=2) model2.fit(X, y)
model3 = DecisionTreeClassifier(max_depth=3) model3.fit(X, y)
model.score(X, y)
0.7619047619047619
model2.score(X, y)
0.8571428571428571
model3.score(X, y)
0.9523809523809523
model4 = DecisionTreeClassifier(max_depth=5) model4.fit(X, y) model4.score(X, y)
1.0
model5 = DecisionTreeClassifier(min_samples_split=2) model5.fit(X, y)
model5.score(X, y)
model6 = DecisionTreeClassifier(min_samples_split=4) model6.fit(X, y) model6.score(X,y)
model7 = DecisionTreeClassifier(min_samples_split=10) model7.fit(X, y) model7.score(X,y)
0.9047619047619048
See this link here .