{ "cells": [ { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "# Lectures 5: Class demo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports, Announcements, LOs" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# import the libraries\n", "import os\n", "import sys\n", "sys.path.append(os.path.join(os.path.abspath(\"../\"), \"code\"))\n", "from plotting_functions import *\n", "from utils import *\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "\n", "from sklearn.compose import make_column_transformer\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.svm import SVC\n", "\n", "%matplotlib inline\n", "\n", "pd.set_option(\"display.max_colwidth\", 200)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Incorporating text features in the Spotify dataset" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Recall that we had dropped `song_title` feature when we worked with the Spotify dataset in Lab 1. \n", "\n", "Let's try to include it in our pipeline and examine whether we get better results. " ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "spotify_df = pd.read_csv(\"../data/spotify.csv\", index_col=0)\n", "X_spotify = spotify_df.drop(columns=[\"target\"])\n", "y_spotify = spotify_df[\"target\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(\n", " X_spotify, y_spotify, test_size=0.2, random_state=123\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(1613, 15)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | acousticness | \n", "danceability | \n", "duration_ms | \n", "energy | \n", "instrumentalness | \n", "key | \n", "liveness | \n", "loudness | \n", "mode | \n", "speechiness | \n", "tempo | \n", "time_signature | \n", "valence | \n", "song_title | \n", "artist | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1505 | \n", "0.004770 | \n", "0.585 | \n", "214740 | \n", "0.614 | \n", "0.000155 | \n", "10 | \n", "0.0762 | \n", "-5.594 | \n", "0 | \n", "0.0370 | \n", "114.059 | \n", "4.0 | \n", "0.2730 | \n", "Cool for the Summer | \n", "Demi Lovato | \n", "
813 | \n", "0.114000 | \n", "0.665 | \n", "216728 | \n", "0.513 | \n", "0.303000 | \n", "0 | \n", "0.1220 | \n", "-7.314 | \n", "1 | \n", "0.3310 | \n", "100.344 | \n", "3.0 | \n", "0.0373 | \n", "Damn Son Where'd You Find This? (feat. Kelly Holiday) - Markus Maximus Remix | \n", "Markus Maximus | \n", "
615 | \n", "0.030200 | \n", "0.798 | \n", "216585 | \n", "0.481 | \n", "0.000000 | \n", "7 | \n", "0.1280 | \n", "-10.488 | \n", "1 | \n", "0.3140 | \n", "127.136 | \n", "4.0 | \n", "0.6400 | \n", "Trill Hoe | \n", "Western Tink | \n", "
319 | \n", "0.106000 | \n", "0.912 | \n", "194040 | \n", "0.317 | \n", "0.000208 | \n", "6 | \n", "0.0723 | \n", "-12.719 | \n", "0 | \n", "0.0378 | \n", "99.346 | \n", "4.0 | \n", "0.9490 | \n", "Who Is He (And What Is He to You?) | \n", "Bill Withers | \n", "
320 | \n", "0.021100 | \n", "0.697 | \n", "236456 | \n", "0.905 | \n", "0.893000 | \n", "6 | \n", "0.1190 | \n", "-7.787 | \n", "0 | \n", "0.0339 | \n", "119.977 | \n", "4.0 | \n", "0.3110 | \n", "Acamar | \n", "Frankey | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2012 | \n", "0.001060 | \n", "0.584 | \n", "274404 | \n", "0.932 | \n", "0.002690 | \n", "1 | \n", "0.1290 | \n", "-3.501 | \n", "1 | \n", "0.3330 | \n", "74.976 | \n", "4.0 | \n", "0.2110 | \n", "Like A Bitch - Kill The Noise Remix | \n", "Kill The Noise | \n", "
1346 | \n", "0.000021 | \n", "0.535 | \n", "203500 | \n", "0.974 | \n", "0.000149 | \n", "10 | \n", "0.2630 | \n", "-3.566 | \n", "0 | \n", "0.1720 | \n", "116.956 | \n", "4.0 | \n", "0.4310 | \n", "Flag of the Beast | \n", "Emmure | \n", "
1406 | \n", "0.503000 | \n", "0.410 | \n", "256333 | \n", "0.648 | \n", "0.000000 | \n", "7 | \n", "0.2190 | \n", "-4.469 | \n", "1 | \n", "0.0362 | \n", "60.391 | \n", "4.0 | \n", "0.3420 | \n", "Don't You Cry For Me | \n", "Cobi | \n", "
1389 | \n", "0.705000 | \n", "0.894 | \n", "222307 | \n", "0.161 | \n", "0.003300 | \n", "4 | \n", "0.3120 | \n", "-14.311 | \n", "1 | \n", "0.0880 | \n", "104.968 | \n", "4.0 | \n", "0.8180 | \n", "장가갈 수 있을까 Can I Get Married? | \n", "Coffeeboy | \n", "
1534 | \n", "0.623000 | \n", "0.470 | \n", "394920 | \n", "0.156 | \n", "0.187000 | \n", "2 | \n", "0.1040 | \n", "-17.036 | \n", "1 | \n", "0.0399 | \n", "118.176 | \n", "4.0 | \n", "0.0591 | \n", "Blue Ballad | \n", "Phil Woods | \n", "
1613 rows × 15 columns
\n", "\n", " | dummy | \n", "
---|---|
fit_time | \n", "0.000 (+/- 0.000) | \n", "
score_time | \n", "0.000 (+/- 0.000) | \n", "
test_score | \n", "0.508 (+/- 0.001) | \n", "
train_score | \n", "0.508 (+/- 0.000) | \n", "
ColumnTransformer(transformers=[('standardscaler', StandardScaler(),\n", " ['acousticness', 'danceability', 'energy',\n", " 'instrumentalness', 'liveness', 'loudness',\n", " 'speechiness', 'tempo', 'valence']),\n", " ('passthrough', 'passthrough', ['mode']),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'),\n", " ['time_signature', 'key'])])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
ColumnTransformer(transformers=[('standardscaler', StandardScaler(),\n", " ['acousticness', 'danceability', 'energy',\n", " 'instrumentalness', 'liveness', 'loudness',\n", " 'speechiness', 'tempo', 'valence']),\n", " ('passthrough', 'passthrough', ['mode']),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'),\n", " ['time_signature', 'key'])])
['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
StandardScaler()
['mode']
passthrough
['time_signature', 'key']
OneHotEncoder(handle_unknown='ignore')
\n", " | acousticness | \n", "danceability | \n", "energy | \n", "instrumentalness | \n", "liveness | \n", "loudness | \n", "speechiness | \n", "tempo | \n", "valence | \n", "mode | \n", "... | \n", "key_2 | \n", "key_3 | \n", "key_4 | \n", "key_5 | \n", "key_6 | \n", "key_7 | \n", "key_8 | \n", "key_9 | \n", "key_10 | \n", "key_11 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-0.697633 | \n", "-0.194548 | \n", "-0.318116 | \n", "-0.492359 | \n", "-0.737898 | \n", "0.395794 | \n", "-0.617752 | \n", "-0.293827 | \n", "-0.908149 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "
1 | \n", "-0.276291 | \n", "0.295726 | \n", "-0.795552 | \n", "0.598355 | \n", "-0.438792 | \n", "-0.052394 | \n", "2.728394 | \n", "-0.802595 | \n", "-1.861238 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2 | \n", "-0.599540 | \n", "1.110806 | \n", "-0.946819 | \n", "-0.492917 | \n", "-0.399607 | \n", "-0.879457 | \n", "2.534909 | \n", "0.191274 | \n", "0.575870 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 | \n", "-0.307150 | \n", "1.809445 | \n", "-1.722063 | \n", "-0.492168 | \n", "-0.763368 | \n", "-1.460798 | \n", "-0.608647 | \n", "-0.839616 | \n", "1.825358 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
4 | \n", "-0.634642 | \n", "0.491835 | \n", "1.057468 | \n", "2.723273 | \n", "-0.458384 | \n", "-0.175645 | \n", "-0.653035 | \n", "-0.074294 | \n", "-0.754491 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1608 | \n", "-0.711944 | \n", "-0.200676 | \n", "1.185100 | \n", "-0.483229 | \n", "-0.393077 | \n", "0.941176 | \n", "2.751157 | \n", "-1.743639 | \n", "-1.158856 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1609 | \n", "-0.715953 | \n", "-0.500969 | \n", "1.383637 | \n", "-0.492380 | \n", "0.482038 | \n", "0.924239 | \n", "0.918743 | \n", "-0.186361 | \n", "-0.269253 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "
1610 | \n", "1.224228 | \n", "-1.267021 | \n", "-0.157395 | \n", "-0.492917 | \n", "0.194687 | \n", "0.688940 | \n", "-0.626857 | \n", "-2.284681 | \n", "-0.629138 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1611 | \n", "2.003419 | \n", "1.699134 | \n", "-2.459489 | \n", "-0.481032 | \n", "0.802042 | \n", "-1.875632 | \n", "-0.037298 | \n", "-0.631064 | \n", "1.295640 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1612 | \n", "1.687114 | \n", "-0.899316 | \n", "-2.483125 | \n", "0.180574 | \n", "-0.556344 | \n", "-2.585697 | \n", "-0.584746 | \n", "-0.141104 | \n", "-1.773086 | \n", "1.0 | \n", "... | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1613 rows × 26 columns
\n", "\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
dummy | \n", "0.000 (+/- 0.000) | \n", "0.000 (+/- 0.000) | \n", "0.508 (+/- 0.001) | \n", "0.508 (+/- 0.000) | \n", "
Decision Tree (no_text) | \n", "0.016 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.688 (+/- 0.023) | \n", "1.000 (+/- 0.000) | \n", "
KNN (no_text) | \n", "0.005 (+/- 0.001) | \n", "0.015 (+/- 0.020) | \n", "0.676 (+/- 0.028) | \n", "0.788 (+/- 0.009) | \n", "
SVM (no_text) | \n", "0.054 (+/- 0.004) | \n", "0.021 (+/- 0.001) | \n", "0.737 (+/- 0.017) | \n", "0.806 (+/- 0.011) | \n", "
ColumnTransformer(transformers=[('standardscaler', StandardScaler(),\n", " ['acousticness', 'danceability', 'energy',\n", " 'instrumentalness', 'liveness', 'loudness',\n", " 'speechiness', 'tempo', 'valence']),\n", " ('passthrough', 'passthrough', ['mode']),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'),\n", " ['time_signature', 'key']),\n", " ('countvectorizer',\n", " CountVectorizer(stop_words='english'),\n", " 'song_title')])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
ColumnTransformer(transformers=[('standardscaler', StandardScaler(),\n", " ['acousticness', 'danceability', 'energy',\n", " 'instrumentalness', 'liveness', 'loudness',\n", " 'speechiness', 'tempo', 'valence']),\n", " ('passthrough', 'passthrough', ['mode']),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore'),\n", " ['time_signature', 'key']),\n", " ('countvectorizer',\n", " CountVectorizer(stop_words='english'),\n", " 'song_title')])
['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
StandardScaler()
['mode']
passthrough
['time_signature', 'key']
OneHotEncoder(handle_unknown='ignore')
song_title
CountVectorizer(stop_words='english')
\n", " | acousticness | \n", "danceability | \n", "energy | \n", "instrumentalness | \n", "liveness | \n", "loudness | \n", "speechiness | \n", "tempo | \n", "valence | \n", "mode | \n", "... | \n", "너와의 | \n", "루시아 | \n", "변명 | \n", "여기 | \n", "이곳에서 | \n", "이대로 | \n", "있어줘요 | \n", "있을까 | \n", "장가갈 | \n", "지금 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-0.697633 | \n", "-0.194548 | \n", "-0.318116 | \n", "-0.492359 | \n", "-0.737898 | \n", "0.395794 | \n", "-0.617752 | \n", "-0.293827 | \n", "-0.908149 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1 | \n", "-0.276291 | \n", "0.295726 | \n", "-0.795552 | \n", "0.598355 | \n", "-0.438792 | \n", "-0.052394 | \n", "2.728394 | \n", "-0.802595 | \n", "-1.861238 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2 | \n", "-0.599540 | \n", "1.110806 | \n", "-0.946819 | \n", "-0.492917 | \n", "-0.399607 | \n", "-0.879457 | \n", "2.534909 | \n", "0.191274 | \n", "0.575870 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 | \n", "-0.307150 | \n", "1.809445 | \n", "-1.722063 | \n", "-0.492168 | \n", "-0.763368 | \n", "-1.460798 | \n", "-0.608647 | \n", "-0.839616 | \n", "1.825358 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
4 | \n", "-0.634642 | \n", "0.491835 | \n", "1.057468 | \n", "2.723273 | \n", "-0.458384 | \n", "-0.175645 | \n", "-0.653035 | \n", "-0.074294 | \n", "-0.754491 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1608 | \n", "-0.711944 | \n", "-0.200676 | \n", "1.185100 | \n", "-0.483229 | \n", "-0.393077 | \n", "0.941176 | \n", "2.751157 | \n", "-1.743639 | \n", "-1.158856 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1609 | \n", "-0.715953 | \n", "-0.500969 | \n", "1.383637 | \n", "-0.492380 | \n", "0.482038 | \n", "0.924239 | \n", "0.918743 | \n", "-0.186361 | \n", "-0.269253 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1610 | \n", "1.224228 | \n", "-1.267021 | \n", "-0.157395 | \n", "-0.492917 | \n", "0.194687 | \n", "0.688940 | \n", "-0.626857 | \n", "-2.284681 | \n", "-0.629138 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1611 | \n", "2.003419 | \n", "1.699134 | \n", "-2.459489 | \n", "-0.481032 | \n", "0.802042 | \n", "-1.875632 | \n", "-0.037298 | \n", "-0.631064 | \n", "1.295640 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "
1612 | \n", "1.687114 | \n", "-0.899316 | \n", "-2.483125 | \n", "0.180574 | \n", "-0.556344 | \n", "-2.585697 | \n", "-0.584746 | \n", "-0.141104 | \n", "-1.773086 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1613 rows × 1910 columns
\n", "\n", " | dutchie | \n", "dynamite | \n", "earth | \n", "easy | \n", "
---|---|---|---|---|
380 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "
639 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "
\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
dummy | \n", "0.000 (+/- 0.000) | \n", "0.000 (+/- 0.000) | \n", "0.508 (+/- 0.001) | \n", "0.508 (+/- 0.000) | \n", "
Decision Tree (no_text) | \n", "0.016 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.688 (+/- 0.023) | \n", "1.000 (+/- 0.000) | \n", "
KNN (no_text) | \n", "0.005 (+/- 0.001) | \n", "0.015 (+/- 0.020) | \n", "0.676 (+/- 0.028) | \n", "0.788 (+/- 0.009) | \n", "
SVM (no_text) | \n", "0.054 (+/- 0.004) | \n", "0.021 (+/- 0.001) | \n", "0.737 (+/- 0.017) | \n", "0.806 (+/- 0.011) | \n", "
Decision Tree (text) | \n", "0.035 (+/- 0.002) | \n", "0.005 (+/- 0.001) | \n", "0.700 (+/- 0.027) | \n", "1.000 (+/- 0.000) | \n", "
KNN (text) | \n", "0.012 (+/- 0.002) | \n", "0.031 (+/- 0.004) | \n", "0.682 (+/- 0.028) | \n", "0.786 (+/- 0.010) | \n", "
SVM (text) | \n", "0.059 (+/- 0.003) | \n", "0.014 (+/- 0.001) | \n", "0.733 (+/- 0.027) | \n", "0.866 (+/- 0.004) | \n", "
\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
dummy | \n", "0.000 (+/- 0.000) | \n", "0.000 (+/- 0.000) | \n", "0.508 (+/- 0.001) | \n", "0.508 (+/- 0.000) | \n", "
Decision Tree (no_text) | \n", "0.016 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.688 (+/- 0.023) | \n", "1.000 (+/- 0.000) | \n", "
KNN (no_text) | \n", "0.005 (+/- 0.001) | \n", "0.015 (+/- 0.020) | \n", "0.676 (+/- 0.028) | \n", "0.788 (+/- 0.009) | \n", "
SVM (no_text) | \n", "0.054 (+/- 0.004) | \n", "0.021 (+/- 0.001) | \n", "0.737 (+/- 0.017) | \n", "0.806 (+/- 0.011) | \n", "
Decision Tree (text) | \n", "0.035 (+/- 0.002) | \n", "0.005 (+/- 0.001) | \n", "0.700 (+/- 0.027) | \n", "1.000 (+/- 0.000) | \n", "
KNN (text) | \n", "0.012 (+/- 0.002) | \n", "0.031 (+/- 0.004) | \n", "0.682 (+/- 0.028) | \n", "0.786 (+/- 0.010) | \n", "
SVM (text) | \n", "0.059 (+/- 0.003) | \n", "0.014 (+/- 0.001) | \n", "0.733 (+/- 0.027) | \n", "0.866 (+/- 0.004) | \n", "
Decision Tree (all) | \n", "0.028 (+/- 0.001) | \n", "0.005 (+/- 0.001) | \n", "0.684 (+/- 0.035) | \n", "1.000 (+/- 0.000) | \n", "
KNN (all) | \n", "0.012 (+/- 0.000) | \n", "0.026 (+/- 0.001) | \n", "0.681 (+/- 0.032) | \n", "0.792 (+/- 0.008) | \n", "
SVM (all) | \n", "0.052 (+/- 0.004) | \n", "0.013 (+/- 0.000) | \n", "0.741 (+/- 0.027) | \n", "0.833 (+/- 0.006) | \n", "
\n", " | north_america | \n", "eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "food_type | \n", "noise_level | \n", "good_server | \n", "comments | \n", "restaurant_name | \n", "target | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Yes | \n", "3.0 | \n", "29 | \n", "10.0 | \n", "120.0 | \n", "Italian | \n", "medium | \n", "Yes | \n", "Ambience | \n", "NaN | \n", "dislike | \n", "
1 | \n", "Yes | \n", "2.0 | \n", "23 | \n", "3.0 | \n", "20.0 | \n", "Canadian/American | \n", "no music | \n", "No | \n", "food tastes bad | \n", "NaN | \n", "dislike | \n", "
2 | \n", "Yes | \n", "2.0 | \n", "21 | \n", "20.0 | \n", "15.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "bad food | \n", "NaN | \n", "dislike | \n", "
3 | \n", "No | \n", "2.0 | \n", "24 | \n", "14.0 | \n", "18.0 | \n", "Other | \n", "medium | \n", "No | \n", "Overall vibe on the restaurant | \n", "NaN | \n", "dislike | \n", "
4 | \n", "Yes | \n", "5.0 | \n", "23 | \n", "30.0 | \n", "20.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "A bad day | \n", "NaN | \n", "dislike | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
959 | \n", "No | \n", "10.0 | \n", "22 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "like | \n", "
960 | \n", "Yes | \n", "1.0 | \n", "20 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "like | \n", "
961 | \n", "No | \n", "1.0 | \n", "22 | \n", "40.0 | \n", "50.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "The self service sauce table is very clean and the sauces were always filled up. | \n", "Haidilao | \n", "like | \n", "
962 | \n", "Yes | \n", "3.0 | \n", "21 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "like | \n", "
963 | \n", "Yes | \n", "3.0 | \n", "27 | \n", "20.0 | \n", "22.0 | \n", "Other | \n", "medium | \n", "Yes | \n", "Lots of meat that was very soft and tasty. Hearty and amazing broth. Good noodle thickness and consistency | \n", "Uno Beef Noodle | \n", "like | \n", "
964 rows × 11 columns
\n", "\n", " | eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "
---|---|---|---|---|
count | \n", "964.000000 | \n", "964.000000 | \n", "6.960000e+02 | \n", "696.000000 | \n", "
mean | \n", "2.585187 | \n", "23.975104 | \n", "1.439254e+04 | \n", "1472.179152 | \n", "
std | \n", "2.246486 | \n", "4.556716 | \n", "3.790481e+05 | \n", "37903.575636 | \n", "
min | \n", "0.000000 | \n", "10.000000 | \n", "-2.000000e+00 | \n", "0.000000 | \n", "
25% | \n", "1.000000 | \n", "21.000000 | \n", "1.000000e+01 | \n", "18.000000 | \n", "
50% | \n", "2.000000 | \n", "22.000000 | \n", "2.000000e+01 | \n", "25.000000 | \n", "
75% | \n", "3.000000 | \n", "26.000000 | \n", "3.000000e+01 | \n", "40.000000 | \n", "
max | \n", "15.000000 | \n", "46.000000 | \n", "1.000000e+07 | \n", "1000000.000000 | \n", "
\n", " | eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "
---|---|---|---|---|
count | \n", "942.000000 | \n", "942.000000 | \n", "674.000000 | \n", "674.000000 | \n", "
mean | \n", "2.598057 | \n", "23.992569 | \n", "24.973294 | \n", "34.023279 | \n", "
std | \n", "2.257787 | \n", "4.582570 | \n", "22.016660 | \n", "29.018622 | \n", "
min | \n", "0.000000 | \n", "10.000000 | \n", "1.000000 | \n", "0.000000 | \n", "
25% | \n", "1.000000 | \n", "21.000000 | \n", "10.000000 | \n", "18.000000 | \n", "
50% | \n", "2.000000 | \n", "22.000000 | \n", "20.000000 | \n", "25.000000 | \n", "
75% | \n", "3.000000 | \n", "26.000000 | \n", "30.000000 | \n", "40.000000 | \n", "
max | \n", "15.000000 | \n", "46.000000 | \n", "200.000000 | \n", "200.000000 | \n", "
\n", " | dummy | \n", "
---|---|
fit_time | \n", "0.001 (+/- 0.000) | \n", "
score_time | \n", "0.001 (+/- 0.000) | \n", "
test_score | \n", "0.515 (+/- 0.002) | \n", "
train_score | \n", "0.515 (+/- 0.000) | \n", "
ColumnTransformer(transformers=[('pipeline-1',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " ['age', 'n_people', 'price']),\n", " ('pipeline-2',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(drop='if_binary'))]),\n", " ['good_server']),\n", " ('pipeline-3',...\n", " OrdinalEncoder(categories=[['no '\n", " 'music',\n", " 'low',\n", " 'medium',\n", " 'high',\n", " 'crazy '\n", " 'loud']]))]),\n", " ['noise_level']),\n", " ('pipeline-4',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore',\n", " sparse_output=False))]),\n", " ['north_america', 'food_type']),\n", " ('drop', 'drop',\n", " ['comments', 'restaurant_name',\n", " 'eat_out_freq'])])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
ColumnTransformer(transformers=[('pipeline-1',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='median')),\n", " ('standardscaler',\n", " StandardScaler())]),\n", " ['age', 'n_people', 'price']),\n", " ('pipeline-2',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(drop='if_binary'))]),\n", " ['good_server']),\n", " ('pipeline-3',...\n", " OrdinalEncoder(categories=[['no '\n", " 'music',\n", " 'low',\n", " 'medium',\n", " 'high',\n", " 'crazy '\n", " 'loud']]))]),\n", " ['noise_level']),\n", " ('pipeline-4',\n", " Pipeline(steps=[('simpleimputer',\n", " SimpleImputer(strategy='most_frequent')),\n", " ('onehotencoder',\n", " OneHotEncoder(handle_unknown='ignore',\n", " sparse_output=False))]),\n", " ['north_america', 'food_type']),\n", " ('drop', 'drop',\n", " ['comments', 'restaurant_name',\n", " 'eat_out_freq'])])
['age', 'n_people', 'price']
SimpleImputer(strategy='median')
StandardScaler()
['good_server']
SimpleImputer(strategy='most_frequent')
OneHotEncoder(drop='if_binary')
['noise_level']
SimpleImputer(strategy='most_frequent')
OrdinalEncoder(categories=[['no music', 'low', 'medium', 'high', 'crazy loud']])
['north_america', 'food_type']
SimpleImputer(strategy='most_frequent')
OneHotEncoder(handle_unknown='ignore', sparse_output=False)
['comments', 'restaurant_name', 'eat_out_freq']
drop
\n", " | age | \n", "n_people | \n", "price | \n", "good_server | \n", "noise_level | \n", "north_america_Don't want to share | \n", "north_america_No | \n", "north_america_Yes | \n", "food_type_Canadian/American | \n", "food_type_Chinese | \n", "food_type_Fusion | \n", "food_type_Indian | \n", "food_type_Italian | \n", "food_type_Mexican | \n", "food_type_Other | \n", "food_type_Quebecois | \n", "food_type_Thai | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-0.669417 | \n", "0.310295 | \n", "-0.368406 | \n", "0.0 | \n", "3.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1 | \n", "-0.669417 | \n", "0.310295 | \n", "-0.054225 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
2 | \n", "-0.895154 | \n", "0.823364 | \n", "-0.250588 | \n", "1.0 | \n", "2.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
3 | \n", "-0.669417 | \n", "-0.202775 | \n", "-0.250588 | \n", "1.0 | \n", "2.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "
4 | \n", "0.007794 | \n", "-0.202775 | \n", "-0.054225 | \n", "1.0 | \n", "3.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
748 | \n", "0.685006 | \n", "-0.715845 | \n", "-0.643315 | \n", "1.0 | \n", "2.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
749 | \n", "0.007794 | \n", "-0.613231 | \n", "-0.918224 | \n", "1.0 | \n", "2.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "
750 | \n", "-0.895154 | \n", "-0.972379 | \n", "-0.643315 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
751 | \n", "-0.895154 | \n", "-0.202775 | \n", "-0.250588 | \n", "1.0 | \n", "2.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "
752 | \n", "-0.895154 | \n", "1.336434 | \n", "-0.054225 | \n", "1.0 | \n", "3.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
753 rows × 17 columns
\n", "\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
dummy | \n", "0.001 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.515 (+/- 0.002) | \n", "0.515 (+/- 0.000) | \n", "
Decision Tree (numeric-only) | \n", "0.003 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.497 (+/- 0.038) | \n", "0.833 (+/- 0.010) | \n", "
KNN (numeric-only) | \n", "0.003 (+/- 0.001) | \n", "0.004 (+/- 0.000) | \n", "0.525 (+/- 0.034) | \n", "0.674 (+/- 0.015) | \n", "
SVM (numeric-only) | \n", "0.012 (+/- 0.000) | \n", "0.005 (+/- 0.000) | \n", "0.587 (+/- 0.033) | \n", "0.623 (+/- 0.006) | \n", "
\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
dummy | \n", "0.001 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.515 (+/- 0.002) | \n", "0.515 (+/- 0.000) | \n", "
Decision Tree (numeric-only) | \n", "0.003 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.497 (+/- 0.038) | \n", "0.833 (+/- 0.010) | \n", "
KNN (numeric-only) | \n", "0.003 (+/- 0.001) | \n", "0.004 (+/- 0.000) | \n", "0.525 (+/- 0.034) | \n", "0.674 (+/- 0.015) | \n", "
SVM (numeric-only) | \n", "0.012 (+/- 0.000) | \n", "0.005 (+/- 0.000) | \n", "0.587 (+/- 0.033) | \n", "0.623 (+/- 0.006) | \n", "
Decision Tree(non-text feats) | \n", "0.009 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.590 (+/- 0.039) | \n", "0.889 (+/- 0.008) | \n", "
KNN(non-text feats) | \n", "0.008 (+/- 0.000) | \n", "0.004 (+/- 0.000) | \n", "0.598 (+/- 0.023) | \n", "0.737 (+/- 0.008) | \n", "
SVM(non-text feats) | \n", "0.019 (+/- 0.000) | \n", "0.008 (+/- 0.000) | \n", "0.687 (+/- 0.011) | \n", "0.733 (+/- 0.008) | \n", "
\n", " | north_america | \n", "eat_out_freq | \n", "age | \n", "n_people | \n", "price | \n", "food_type | \n", "noise_level | \n", "good_server | \n", "comments | \n", "restaurant_name | \n", "
---|---|---|---|---|---|---|---|---|---|---|
80 | \n", "No | \n", "2.0 | \n", "21 | \n", "30.0 | \n", "2200.0 | \n", "Chinese | \n", "high | \n", "No | \n", "The environment was very not clean. The food tasted awful. | \n", "NaN | \n", "
934 | \n", "Yes | \n", "4.0 | \n", "21 | \n", "30.0 | \n", "3000.0 | \n", "Canadian/American | \n", "low | \n", "Yes | \n", "The building and the room gave a very comfy feeling. Immediately after sitting down it felt like we were right at home. | \n", "NaN | \n", "
911 | \n", "No | \n", "4.0 | \n", "20 | \n", "40.0 | \n", "2500.0 | \n", "Canadian/American | \n", "medium | \n", "Yes | \n", "I was hungry | \n", "Chambar | \n", "
459 | \n", "Yes | \n", "5.0 | \n", "21 | \n", "NaN | \n", "NaN | \n", "Quebecois | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
62 | \n", "Yes | \n", "2.0 | \n", "24 | \n", "20.0 | \n", "3000.0 | \n", "Indian | \n", "high | \n", "Yes | \n", "bad taste | \n", "east is east | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
106 | \n", "No | \n", "3.0 | \n", "27 | \n", "10.0 | \n", "1500.0 | \n", "Chinese | \n", "medium | \n", "Yes | \n", "Food wasn't great. | \n", "NaN | \n", "
333 | \n", "No | \n", "1.0 | \n", "24 | \n", "12.0 | \n", "800.0 | \n", "Other | \n", "medium | \n", "Yes | \n", "NaN | \n", "NaN | \n", "
393 | \n", "Yes | \n", "4.0 | \n", "20 | \n", "5.0 | \n", "1500.0 | \n", "Canadian/American | \n", "low | \n", "No | \n", "NaN | \n", "NaN | \n", "
376 | \n", "Yes | \n", "5.0 | \n", "20 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
525 | \n", "Don't want to share | \n", "4.0 | \n", "20 | \n", "50.0 | \n", "3000.0 | \n", "Chinese | \n", "high | \n", "Yes | \n", "NaN | \n", "Haidilao | \n", "
753 rows × 10 columns
\n", "\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
dummy | \n", "0.001 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.515 (+/- 0.002) | \n", "0.515 (+/- 0.000) | \n", "
Decision Tree (numeric-only) | \n", "0.003 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.497 (+/- 0.038) | \n", "0.833 (+/- 0.010) | \n", "
KNN (numeric-only) | \n", "0.003 (+/- 0.001) | \n", "0.004 (+/- 0.000) | \n", "0.525 (+/- 0.034) | \n", "0.674 (+/- 0.015) | \n", "
SVM (numeric-only) | \n", "0.012 (+/- 0.000) | \n", "0.005 (+/- 0.000) | \n", "0.587 (+/- 0.033) | \n", "0.623 (+/- 0.006) | \n", "
Decision Tree(non-text feats) | \n", "0.009 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.590 (+/- 0.039) | \n", "0.889 (+/- 0.008) | \n", "
KNN(non-text feats) | \n", "0.008 (+/- 0.000) | \n", "0.004 (+/- 0.000) | \n", "0.598 (+/- 0.023) | \n", "0.737 (+/- 0.008) | \n", "
SVM(non-text feats) | \n", "0.019 (+/- 0.000) | \n", "0.008 (+/- 0.000) | \n", "0.687 (+/- 0.011) | \n", "0.733 (+/- 0.008) | \n", "
Decision Tree(text) | \n", "0.008 (+/- 0.001) | \n", "0.001 (+/- 0.000) | \n", "0.618 (+/- 0.036) | \n", "0.735 (+/- 0.004) | \n", "
KNN(text) | \n", "0.004 (+/- 0.000) | \n", "0.006 (+/- 0.002) | \n", "0.572 (+/- 0.023) | \n", "0.646 (+/- 0.026) | \n", "
SVM(text) | \n", "0.010 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.649 (+/- 0.022) | \n", "0.728 (+/- 0.005) | \n", "
\n", " | fit_time | \n", "score_time | \n", "test_score | \n", "train_score | \n", "
---|---|---|---|---|
dummy | \n", "0.001 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.515 (+/- 0.002) | \n", "0.515 (+/- 0.000) | \n", "
Decision Tree (numeric-only) | \n", "0.003 (+/- 0.000) | \n", "0.001 (+/- 0.000) | \n", "0.497 (+/- 0.038) | \n", "0.833 (+/- 0.010) | \n", "
KNN (numeric-only) | \n", "0.003 (+/- 0.001) | \n", "0.004 (+/- 0.000) | \n", "0.525 (+/- 0.034) | \n", "0.674 (+/- 0.015) | \n", "
SVM (numeric-only) | \n", "0.012 (+/- 0.000) | \n", "0.005 (+/- 0.000) | \n", "0.587 (+/- 0.033) | \n", "0.623 (+/- 0.006) | \n", "
Decision Tree(non-text feats) | \n", "0.009 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.590 (+/- 0.039) | \n", "0.889 (+/- 0.008) | \n", "
KNN(non-text feats) | \n", "0.008 (+/- 0.000) | \n", "0.004 (+/- 0.000) | \n", "0.598 (+/- 0.023) | \n", "0.737 (+/- 0.008) | \n", "
SVM(non-text feats) | \n", "0.019 (+/- 0.000) | \n", "0.008 (+/- 0.000) | \n", "0.687 (+/- 0.011) | \n", "0.733 (+/- 0.008) | \n", "
Decision Tree(text) | \n", "0.008 (+/- 0.001) | \n", "0.001 (+/- 0.000) | \n", "0.618 (+/- 0.036) | \n", "0.735 (+/- 0.004) | \n", "
KNN(text) | \n", "0.004 (+/- 0.000) | \n", "0.006 (+/- 0.002) | \n", "0.572 (+/- 0.023) | \n", "0.646 (+/- 0.026) | \n", "
SVM(text) | \n", "0.010 (+/- 0.000) | \n", "0.003 (+/- 0.000) | \n", "0.649 (+/- 0.022) | \n", "0.728 (+/- 0.005) | \n", "
Decision Tree(all) | \n", "0.016 (+/- 0.001) | \n", "0.005 (+/- 0.001) | \n", "0.624 (+/- 0.022) | \n", "0.893 (+/- 0.006) | \n", "
KNN(all) | \n", "0.013 (+/- 0.000) | \n", "0.012 (+/- 0.001) | \n", "0.625 (+/- 0.027) | \n", "0.748 (+/- 0.015) | \n", "
SVM(all) | \n", "0.023 (+/- 0.000) | \n", "0.008 (+/- 0.001) | \n", "0.699 (+/- 0.017) | \n", "0.786 (+/- 0.008) | \n", "