Fit dummy regressor and linear regression models

fit_regressor(
  train_df,
  target_col = NULL,
  numeric_feats = NULL,
  categorical_feats = NULL,
  cv = 5
)

Arguments

train_df

dataframe that will be used to train the model

target_col

The column that needs to be classified as a string

numeric_feats

The numeric columns as a vector character

categorical_feats

The categorical columns as a vector character

cv

The number of cross validation folds as an integer

Value

A data frame

Examples

fit_regressor(gapminder::gapminder, target_col="gdpPercap", numeric_feats=c("pop"), categorical_feats <- c("continent"), cv =5)
#> Warning: There were missing values in resampled performance measures.
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> t=100, m=3
#> t=200, m=5
#> t=300, m=3
#> t=400, m=3
#> t=500, m=5
#> t=600, m=3
#> t=700, m=3
#> t=800, m=3
#> t=900, m=4
#> t=100, m=3
#> t=200, m=5
#> t=300, m=4
#> t=400, m=3
#> t=500, m=4
#> t=600, m=4
#> t=700, m=4
#> t=800, m=4
#> t=900, m=4
#> t=100, m=5
#> t=200, m=3
#> t=300, m=4
#> t=400, m=4
#> t=500, m=5
#> t=600, m=3
#> t=700, m=3
#> t=800, m=4
#> t=900, m=4
#> t=100, m=3
#> t=200, m=4
#> t=300, m=3
#> t=400, m=3
#> t=500, m=3
#> t=600, m=3
#> t=700, m=4
#> t=800, m=3
#> t=900, m=3
#> t=100, m=4
#> t=200, m=4
#> t=300, m=4
#> t=400, m=4
#> t=500, m=5
#> t=600, m=3
#> t=700, m=5
#> t=800, m=3
#> t=900, m=3
#> t=100, m=5
#> t=200, m=3
#> t=300, m=4
#> t=400, m=3
#> t=500, m=3
#> t=600, m=5
#> t=700, m=3
#> t=800, m=3
#> t=900, m=3
#>              models  Rsquared     RMSE
#> 1   Dummy regressor       NaN 9758.116
#> 2 Linear regression 0.2561448 8443.195
#> 3             Ridge 0.2379906 8583.318
fit_regressor(gapminder::gapminder, target_col="gdpPercap", numeric_feats=c("year", "lifeExp", "pop"), categorical_feats <- c("continent"), cv =5)
#> Warning: There were missing values in resampled performance measures.
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> Warning: prediction from a rank-deficient fit may be misleading
#> t=100, m=5
#> t=200, m=6
#> t=300, m=6
#> t=400, m=6
#> t=500, m=5
#> t=600, m=5
#> t=700, m=5
#> t=800, m=6
#> t=900, m=5
#> t=100, m=7
#> t=200, m=6
#> t=300, m=5
#> t=400, m=6
#> t=500, m=5
#> t=600, m=7
#> t=700, m=6
#> t=800, m=6
#> t=900, m=6
#> t=100, m=6
#> t=200, m=6
#> t=300, m=5
#> t=400, m=7
#> t=500, m=4
#> t=600, m=7
#> t=700, m=6
#> t=800, m=5
#> t=900, m=5
#> t=100, m=6
#> t=200, m=5
#> t=300, m=6
#> t=400, m=6
#> t=500, m=4
#> t=600, m=6
#> t=700, m=6
#> t=800, m=4
#> t=900, m=4
#> t=100, m=4
#> t=200, m=6
#> t=300, m=5
#> t=400, m=5
#> t=500, m=4
#> t=600, m=5
#> t=700, m=3
#> t=800, m=4
#> t=900, m=6
#> t=100, m=5
#> t=200, m=6
#> t=300, m=6
#> t=400, m=6
#> t=500, m=6
#> t=600, m=5
#> t=700, m=6
#> t=800, m=6
#> t=900, m=6
#>              models  Rsquared     RMSE
#> 1   Dummy regressor       NaN 9758.116
#> 2 Linear regression 0.4021298 7596.120
#> 3             Ridge 0.3774341 7768.282