#########################################

#Machine Learning with R: an introduction

#########################################

############Titanic##########

#automl package

#supress warnings messages
options(warn=-1)

#load and connect H2o
library(h2o)
h2o.init()
h2o.no_progress() 

#load data and transform to a h20 object

library(titanic)
dataset <- titanic %>%
  mutate(pclass = factor(pclass), sex = factor(sex), survived = factor(survived)) %>%
  filter(age != is.na(age)) 

df<-as.h2o(dataset)

#Describe the data

h2o.describe(df)

#Prepare the variables and split data

#outcome
y <- "survived"

splits <- h2o.splitFrame(df, ratios = 0.8, seed = 1)
train <- splits[[1]]
test <- splits[[2]]

#Run 
aml <- h2o.automl(y = y,
                  training_frame = train,
                  leaderboard_frame = test,
                  max_runtime_secs = 60,
                  seed = 123)

#look the models
print(aml)
print(aml@leaderboard)

#accuracy
h2o.performance(aml@leader,test)

#infogram
res<-h2o.infogram(y=y,training_frame = train)
plot(res)

#varimp
h2o.varimp(aml@leader)
h2o.varimp_plot(aml@leader)
