**** Machine learning in Stata via H2O ****

cap noi h2o shutdown, force
cap noi log close

cd "C:\Users\Eduardo Garcia\Documents\StataCorp\Presentations\H2OML webinar"

log using ml_webinar_basic, replace

* H2O setup
use https://www.stata.com/users/lil/fifa, clear
gen ln_mkval = ln(average_market_value)
h2o init
_h2oframe put, into(fifa) current
_h2oframe toenum position nationality league_rank, replace
_h2oframe describe
_h2oframe split fifa, into(train test) split (0.8, 0.2) rseed(19)
_h2oframe change train
global predictors position age height nationality ///
league_rank average_minutes_played ///
average_goals_per_game average_assists_per_game ///
total_yellow_cards team_win_ratio

	 
h2oml rfregress ln_mkval  $predictors, h2orseed(19)
h2omlest store rf_basic

h2oml rfregress ln_mkval  $predictors, h2orseed(19) predsampvalue(2) minobsleaf(5) maxdepth(10)
h2oml rfregress ln_mkval  $predictors, h2orseed(19) binsroot(4) binscont(4)
h2oml rfregress ln_mkval  $predictors, h2orseed(19) ntrees(100)

h2omlest restore rf_basic
h2omlpredict ln_mkval_hat, frame(test)
_h2oframe change test
_h2oframe list name ln_mkval ln_mkval_hat
h2omlestat metrics, frame(test)

h2oml gbregress ln_mkval  $predictors, h2orseed(19)
h2oml gbregress ln_mkval  $predictors, h2orseed(19) lrate(1)
h2oml gbregress ln_mkval  $predictors, h2orseed(19) lrate(0.2) lratedecay(0.5)
h2oml gbregress ln_mkval  $predictors, h2orseed(19) lrate(0.2) lratedecay(0.5) ///
loss(quantile, alpha(0.25))
h2omlpredict ln_mkval_hat_25, frame(test)
h2oml gbregress ln_mkval  $predictors, h2orseed(19) lrate(0.2) lratedecay(0.5) ///
loss(quantile, alpha(0.5))
h2omlpredict ln_mkval_hat_50, frame(test)
h2oml gbregress ln_mkval  $predictors, h2orseed(19) lrate(0.2) lratedecay(0.5) ///
loss(quantile, alpha(0.75))
h2omlpredict ln_mkval_hat_75, frame(test)
_h2oframe change test
_h2oframe list name ln_mkval_hat_25 ln_mkval_hat_50 ln_mkval_hat_75


h2o shutdown, force

log close


clear all

cap noi log close
log using tuning2, replace

* H2O setup
use https://www.stata-press.com/data/r19/socialpressure, clear
describe
h2o init
_h2oframe put, into(social) current
_h2oframe split social, into(train valid test) split (0.6, 0.3, 0.1) rseed(19)
_h2oframe change train
global predictors gender g2000 g2002 p2000 p2002 p2004 treatment age

h2oml gbbinclass voted $predictors, h2orseed(19) lrate(0.1(0.1)1) ///
lratedecay(0.1(0.1)1) validframe(valid)
h2omlestat metrics, frame(test)

h2o shutdown, force

use https://www.stata.com/users/lil/fifa, clear
gen ln_mkval = ln(average_market_value)
h2o init
_h2oframe put, into(fifa) current
_h2oframe toenum position nationality league_rank, replace
_h2oframe describe
_h2oframe split fifa, into(train test) split (0.8, 0.2) rseed(19)
_h2oframe change train
global predictors position age height nationality ///
league_rank average_minutes_played ///
average_goals_per_game average_assists_per_game ///
total_yellow_cards team_win_ratio

h2oml rfregress ln_mkval  $predictors, h2orseed(19) ntrees(50(5)450) cv(3)
h2omlestat metrics, frame(test)

h2o shutdown, force

log close


clear all
cap noi log close
log using explain, replace

use https://www.stata.com/users/lil/fifa, clear
gen ln_mkval = ln(average_market_value)
h2o init
_h2oframe put, into(fifa) current
_h2oframe toenum position nationality league_rank, replace
_h2oframe describe
_h2oframe split fifa, into(train test) split (0.8, 0.2) rseed(19)
_h2oframe change train
global predictors position age height nationality ///
league_rank average_minutes_played ///
average_goals_per_game average_assists_per_game ///
total_yellow_cards team_win_ratio

h2oml gbregress ln_mkval  $predictors, h2orseed(19) cv(10)
h2omlgraph pdp age
graph export "C:\Users\Eduardo Garcia\Documents\StataCorp\Presentations\H2OML webinar\Images\pdp.jpg", as(jpg) name("Graph") quality(100) replace
h2omlgraph varimp
graph export "C:\Users\Eduardo Garcia\Documents\StataCorp\Presentations\H2OML webinar\Images\varimp.jpg", as(jpg) name("Graph") quality(100) replace

h2omlgraph ice team_win_ratio
graph export "C:\Users\Eduardo Garcia\Documents\StataCorp\Presentations\H2OML webinar\Images\ice.jpg", as(jpg) name("Graph") quality(100) replace
h2omlgraph shapvalues, obs(590) title("SHAP values for Kylian Mbappé") frame(fifa)
graph export "C:\Users\Eduardo Garcia\Documents\StataCorp\Presentations\H2OML webinar\Images\shapvalues.jpg", as(jpg) name("Graph") quality(100) replace
h2omlgraph shapsummary, frame(fifa)
graph export "C:\Users\Eduardo Garcia\Documents\StataCorp\Presentations\H2OML webinar\Images\shapsummary.jpg", as(jpg) name("Graph") quality(100) replace

h2o shutdown, force

log close



