<axghjuifriogbhrtjlsdvfs

This commit is contained in:
kubrik 2024-07-19 15:30:19 +02:00
parent 84203724ce
commit 414cdfb536
8 changed files with 164 additions and 471 deletions

Binary file not shown.

View file

@ -7,7 +7,8 @@ import pickle
import optuna
import time
import numpy as np
import cudf
import cupy as cp
def LGBM_GCV(X_train, X_test, y_train, y_test):
@ -45,7 +46,7 @@ def LGBM_GCV(X_train, X_test, y_train, y_test):
return (cv_scores)
def LGBMfit_CV(X_train, X_test, y_train, y_test):
def LGBMfit_Bayesian(X_train, X_test, y_train, y_test):
# Búsqueda bayesiana de hiperparámetros con optuna
# ==============================================================================
@ -100,12 +101,12 @@ def LGBMfit_CV(X_train, X_test, y_train, y_test):
return cv_scores
def XGBfit_CV_score(X_train, X_test, X_val, y_train, y_test, y_val):
def XGBfit_CV_bayesian(X_train, X_test, y_train, y_test):
# Búsqueda bayesiana de hiperparámetros con optuna
# ==============================================================================
def objective(trial):
params = {
'n_estimators': trial.suggest_int('n_estimators', 10, 1000, step=10),
'n_estimators': trial.suggest_int('n_estimators', 1000, 100000, step=10),
'max_depth': trial.suggest_int('max_depth', 3, 12),
'scale_pos_weight': trial.suggest_int('scale_pos_weight', 1, 5),
'learning_rate': trial.suggest_float('learning_rate', 0.00001, 0.01),
@ -122,16 +123,16 @@ def XGBfit_CV_score(X_train, X_test, X_val, y_train, y_test, y_val):
random_state = 42,
verbose = 0,
enable_categorical = True,
multi_strategy = "multi_output_tree",
device = "cuda",
**params
)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
model.fit(cudf.DataFrame.from_pandas(X_train), y_train)
predictions = model.predict(cudf.DataFrame.from_pandas(X_test))
score = mean_squared_error(y_test, predictions, squared=False)
return score
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100, show_progress_bar=True, timeout=100*10)
study.optimize(objective, n_trials=200, show_progress_bar=True, timeout=100*10)
print('Mejores hiperparámetros:', study.best_params)
print('Mejor score:', study.best_value)
@ -145,13 +146,13 @@ def XGBfit_CV_score(X_train, X_test, X_val, y_train, y_test, y_val):
random_state = 42,
verbose = 0,
enable_categorical = True,
multi_strategy = "multi_output_tree",
device = "cuda",
**study.best_params
)
# Entrenamiento & scores modelo
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=42)
cv = RepeatedKFold(n_splits=10, n_repeats=1, random_state=42)
#lightGMB_model.fit(X_train, y_train)
cv_scores = cross_validate(xgb_xgb,
pd.concat([X_train, X_test]), pd.concat([y_train, y_test]), cv = cv,
@ -160,57 +161,48 @@ def XGBfit_CV_score(X_train, X_test, X_val, y_train, y_test, y_val):
return_indices = True)
return (cv_scores)
def XGBfit_CV_score(X_train, X_test, X_val, y_train, y_test, y_val):
def XGBfit_GridS_CV(X_train, X_test, y_train, y_test):
# Búsqueda bayesiana de hiperparámetros con optuna
# ==============================================================================
def objective(trial):
params = {
'n_estimators': trial.suggest_int('n_estimators', 10, 1000, step=10),
'max_depth': trial.suggest_int('max_depth', 3, 12),
'scale_pos_weight': trial.suggest_int('scale_pos_weight', 1, 5),
'learning_rate': trial.suggest_float('learning_rate', 0.00001, 0.01),
'reg_lambda': trial.suggest_float('reg_lambda', 0.0001, 0.1, log=True),
'reg_alpha': trial.suggest_float('reg_alpha', 0.0001, 0.1, log=True),
'colsample_bynode': trial.suggest_float('colsample_bynode', 0.1, 1),
'subsample': trial.suggest_float('subsample', 0.1, 1),
params = {
'n_estimators': [100, 300, 500, 700, 1000],
'max_depth': [3, 5, 7, 12],
'scale_pos_weight': [1, 3, 5],
'learning_rate': [0.000001, 0.001, 0.01, 0.1],
'reg_lambda': [0.0001, 0.001, 0.1],
'reg_alpha': [0.0001, 0.001, 0.1],
'colsample_bynode': [0.001, 0.1, 1],
'subsample': [0.001, 0.1, 1],
}
model = XGBRegressor(
model = XGBRegressor(
tree_method = 'hist',
eval_metric = 'rmse',
n_jobs = -1,
random_state = 42,
verbose = 0,
enable_categorical = True,
multi_strategy = "multi_output_tree",
device = "cuda",
**params
)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
score = mean_squared_error(y_test, predictions, squared=False)
return score
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=42)
grid_search = GridSearchCV(estimator = model, param_grid = params, cv = cv, n_jobs = -1, verbose = 0)
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100, show_progress_bar=True, timeout=100*10)
grid_search.fit(cudf.DataFrame.from_pandas(X_train), y_train)
print('Mejores hiperparámetros:', study.best_params)
print('Mejor score:', study.best_value)
# XGBoost con los mejores hiperparámetros encontrados
# ==============================================================================
xgb_xgb = XGBRegressor(
tree_method = 'hist',
eval_metric = 'rmse',
n_jobs = -1,
random_state = 42,
verbose = 0,
enable_categorical = True,
multi_strategy = "multi_output_tree",
**study.best_params
device = "cuda",
**grid_search.best_params_
)
@ -218,7 +210,9 @@ def XGBfit_CV_score(X_train, X_test, X_val, y_train, y_test, y_val):
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=42)
#lightGMB_model.fit(X_train, y_train)
cv_scores = cross_validate(xgb_xgb,
pd.concat([X_train, X_test]), pd.concat([y_train, y_test]), cv = cv,
x = cudf.DataFrame.from_pandas(pd.concat([X_train, X_test])),
y = pd.concat([y_train, y_test]),
cv = cv,
scoring = ("r2", "neg_mean_absolute_error", "neg_root_mean_squared_error"),
return_estimator = True,
return_indices = True)

View file

@ -8,449 +8,146 @@
"source": [
"import pickle\n",
"\n",
"with open(\"prueba_modelo_2.pkl\", \"rb\") as f:\n",
" cv_scores = pickle.load(f)"
"with open(\"/home/meta_die/Documents/repositories/modelsanoapp/results/test_cases/VA-GG_case/prueba_VA-GG_case_modelo.pkl\", \"rb\") as f:\n",
" cv_scores = pickle.load(f) "
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {\n",
" /* Definition of color scheme common for light and dark mode */\n",
" --sklearn-color-text: black;\n",
" --sklearn-color-line: gray;\n",
" /* Definition of color scheme for unfitted estimators */\n",
" --sklearn-color-unfitted-level-0: #fff5e6;\n",
" --sklearn-color-unfitted-level-1: #f6e4d2;\n",
" --sklearn-color-unfitted-level-2: #ffe0b3;\n",
" --sklearn-color-unfitted-level-3: chocolate;\n",
" /* Definition of color scheme for fitted estimators */\n",
" --sklearn-color-fitted-level-0: #f0f8ff;\n",
" --sklearn-color-fitted-level-1: #d4ebff;\n",
" --sklearn-color-fitted-level-2: #b3dbfd;\n",
" --sklearn-color-fitted-level-3: cornflowerblue;\n",
"\n",
" /* Specific color for light theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
" --sklearn-color-icon: #696969;\n",
"\n",
" @media (prefers-color-scheme: dark) {\n",
" /* Redefinition of color scheme for dark theme */\n",
" --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
" --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
" --sklearn-color-icon: #878787;\n",
" }\n",
"}\n",
"\n",
"#sk-container-id-1 {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"#sk-container-id-1 pre {\n",
" padding: 0;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-hidden--visually {\n",
" border: 0;\n",
" clip: rect(1px 1px 1px 1px);\n",
" clip: rect(1px, 1px, 1px, 1px);\n",
" height: 1px;\n",
" margin: -1px;\n",
" overflow: hidden;\n",
" padding: 0;\n",
" position: absolute;\n",
" width: 1px;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-dashed-wrapped {\n",
" border: 1px dashed var(--sklearn-color-line);\n",
" margin: 0 0.4em 0.5em 0.4em;\n",
" box-sizing: border-box;\n",
" padding-bottom: 0.4em;\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-container {\n",
" /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
" but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
" so we also need the `!important` here to be able to override the\n",
" default hidden behavior on the sphinx rendered scikit-learn.org.\n",
" See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
" display: inline-block !important;\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-text-repr-fallback {\n",
" display: none;\n",
"}\n",
"\n",
"div.sk-parallel-item,\n",
"div.sk-serial,\n",
"div.sk-item {\n",
" /* draw centered vertical line to link estimators */\n",
" background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
" background-size: 2px 100%;\n",
" background-repeat: no-repeat;\n",
" background-position: center center;\n",
"}\n",
"\n",
"/* Parallel-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item::after {\n",
" content: \"\";\n",
" width: 100%;\n",
" border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
" flex-grow: 1;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel {\n",
" display: flex;\n",
" align-items: stretch;\n",
" justify-content: center;\n",
" background-color: var(--sklearn-color-background);\n",
" position: relative;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item {\n",
" display: flex;\n",
" flex-direction: column;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
" align-self: flex-end;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
" align-self: flex-start;\n",
" width: 50%;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
" width: 0;\n",
"}\n",
"\n",
"/* Serial-specific style estimator block */\n",
"\n",
"#sk-container-id-1 div.sk-serial {\n",
" display: flex;\n",
" flex-direction: column;\n",
" align-items: center;\n",
" background-color: var(--sklearn-color-background);\n",
" padding-right: 1em;\n",
" padding-left: 1em;\n",
"}\n",
"\n",
"\n",
"/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
"clickable and can be expanded/collapsed.\n",
"- Pipeline and ColumnTransformer use this feature and define the default style\n",
"- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
"*/\n",
"\n",
"/* Pipeline and ColumnTransformer style (default) */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable {\n",
" /* Default theme specific background. It is overwritten whether we have a\n",
" specific estimator or a Pipeline/ColumnTransformer */\n",
" background-color: var(--sklearn-color-background);\n",
"}\n",
"\n",
"/* Toggleable label */\n",
"#sk-container-id-1 label.sk-toggleable__label {\n",
" cursor: pointer;\n",
" display: block;\n",
" width: 100%;\n",
" margin-bottom: 0;\n",
" padding: 0.5em;\n",
" box-sizing: border-box;\n",
" text-align: center;\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
" /* Arrow on the left of the label */\n",
" content: \"▸\";\n",
" float: left;\n",
" margin-right: 0.25em;\n",
" color: var(--sklearn-color-icon);\n",
"}\n",
"\n",
"#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
" color: var(--sklearn-color-text);\n",
"}\n",
"\n",
"/* Toggleable content - dropdown */\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content {\n",
" max-height: 0;\n",
" max-width: 0;\n",
" overflow: hidden;\n",
" text-align: left;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content pre {\n",
" margin: 0.2em;\n",
" border-radius: 0.25em;\n",
" color: var(--sklearn-color-text);\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
" /* Expand drop-down */\n",
" max-height: 200px;\n",
" max-width: 100%;\n",
" overflow: auto;\n",
"}\n",
"\n",
"#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
" content: \"▾\";\n",
"}\n",
"\n",
"/* Pipeline/ColumnTransformer-specific style */\n",
"\n",
"#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator-specific style */\n",
"\n",
"/* Colorize estimator box */\n",
"#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
"#sk-container-id-1 div.sk-label label {\n",
" /* The background is the default theme color */\n",
" color: var(--sklearn-color-text-on-default-background);\n",
"}\n",
"\n",
"/* On hover, darken the color of the background */\n",
"#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"/* Label box, darken color on hover, fitted */\n",
"#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
" color: var(--sklearn-color-text);\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Estimator label */\n",
"\n",
"#sk-container-id-1 div.sk-label label {\n",
" font-family: monospace;\n",
" font-weight: bold;\n",
" display: inline-block;\n",
" line-height: 1.2em;\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-label-container {\n",
" text-align: center;\n",
"}\n",
"\n",
"/* Estimator-specific */\n",
"#sk-container-id-1 div.sk-estimator {\n",
" font-family: monospace;\n",
" border: 1px dotted var(--sklearn-color-border-box);\n",
" border-radius: 0.25em;\n",
" box-sizing: border-box;\n",
" margin-bottom: 0.5em;\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-0);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-0);\n",
"}\n",
"\n",
"/* on hover */\n",
"#sk-container-id-1 div.sk-estimator:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-2);\n",
"}\n",
"\n",
"#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-2);\n",
"}\n",
"\n",
"/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
"\n",
"/* Common style for \"i\" and \"?\" */\n",
"\n",
".sk-estimator-doc-link,\n",
"a:link.sk-estimator-doc-link,\n",
"a:visited.sk-estimator-doc-link {\n",
" float: right;\n",
" font-size: smaller;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1em;\n",
" height: 1em;\n",
" width: 1em;\n",
" text-decoration: none !important;\n",
" margin-left: 1ex;\n",
" /* unfitted */\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted,\n",
"a:link.sk-estimator-doc-link.fitted,\n",
"a:visited.sk-estimator-doc-link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
".sk-estimator-doc-link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover,\n",
"div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
".sk-estimator-doc-link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"/* Span, style for the box shown on hovering the info icon */\n",
".sk-estimator-doc-link span {\n",
" display: none;\n",
" z-index: 9999;\n",
" position: relative;\n",
" font-weight: normal;\n",
" right: .2ex;\n",
" padding: .5ex;\n",
" margin: .5ex;\n",
" width: min-content;\n",
" min-width: 20ex;\n",
" max-width: 50ex;\n",
" color: var(--sklearn-color-text);\n",
" box-shadow: 2pt 2pt 4pt #999;\n",
" /* unfitted */\n",
" background: var(--sklearn-color-unfitted-level-0);\n",
" border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link.fitted span {\n",
" /* fitted */\n",
" background: var(--sklearn-color-fitted-level-0);\n",
" border: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"\n",
".sk-estimator-doc-link:hover span {\n",
" display: block;\n",
"}\n",
"\n",
"/* \"?\"-specific style due to the `<a>` HTML tag */\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link {\n",
" float: right;\n",
" font-size: 1rem;\n",
" line-height: 1em;\n",
" font-family: monospace;\n",
" background-color: var(--sklearn-color-background);\n",
" border-radius: 1rem;\n",
" height: 1rem;\n",
" width: 1rem;\n",
" text-decoration: none;\n",
" /* unfitted */\n",
" color: var(--sklearn-color-unfitted-level-1);\n",
" border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted {\n",
" /* fitted */\n",
" border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
" color: var(--sklearn-color-fitted-level-1);\n",
"}\n",
"\n",
"/* On hover */\n",
"#sk-container-id-1 a.estimator_doc_link:hover {\n",
" /* unfitted */\n",
" background-color: var(--sklearn-color-unfitted-level-3);\n",
" color: var(--sklearn-color-background);\n",
" text-decoration: none;\n",
"}\n",
"\n",
"#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
" /* fitted */\n",
" background-color: var(--sklearn-color-fitted-level-3);\n",
"}\n",
"</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LGBMRegressor(learning_rate=0.008070433979340683, max_depth=2, n_estimators=410,\n",
" n_jobs=-1, num_leaves=1550, random_state=42,\n",
" subsample=0.30662103796774687, verbose=0)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;LGBMRegressor<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LGBMRegressor(learning_rate=0.008070433979340683, max_depth=2, n_estimators=410,\n",
" n_jobs=-1, num_leaves=1550, random_state=42,\n",
" subsample=0.30662103796774687, verbose=0)</pre></div> </div></div></div></div>"
],
"text/plain": [
"LGBMRegressor(learning_rate=0.008070433979340683, max_depth=2, n_estimators=410,\n",
" n_jobs=-1, num_leaves=1550, random_state=42,\n",
" subsample=0.30662103796774687, verbose=0)"
"-0.10651202704048467"
]
},
"execution_count": 8,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cv_scores[\"estimator\"][0].predict()"
"import numpy as np\n",
"np.mean(cv_scores[\"test_neg_mean_absolute_error\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": []
"source": [
"import cupy as cp\n",
"import osdump\n",
"import pandas as pd\n",
"from utils import preproc_2\n",
"import cudf\n",
"import numpy as np\n",
"from sklearn.experimental import enable_iterative_imputer\n",
"from sklearn.impute import IterativeImputer\n",
"from xgboost import XGBRegressor"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Column must have no nulls.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m features \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mN.G_plasm_pre\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mN_pre\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mN.G_urine_pre\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mN.GG_pre\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mN.S_pre\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTotal.N_pre\u001b[39m\u001b[38;5;124m'\u001b[39m] \n\u001b[1;32m 7\u001b[0m featplustargets \u001b[38;5;241m=\u001b[39m features \u001b[38;5;241m+\u001b[39m targets \n\u001b[0;32m----> 8\u001b[0m to_imput \u001b[38;5;241m=\u001b[39m \u001b[43mmain_df\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmain_df\u001b[49m\u001b[43m[\u001b[49m\u001b[43mfeatplustargets\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mselect_dtypes\u001b[49m\u001b[43m(\u001b[49m\u001b[43minclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnumber\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_cupy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m iimp \u001b[38;5;241m=\u001b[39m IterativeImputer(\n\u001b[1;32m 11\u001b[0m estimator \u001b[38;5;241m=\u001b[39m XGBRegressor(device \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 12\u001b[0m random_state \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m42\u001b[39m,\n\u001b[1;32m 13\u001b[0m verbose \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m,\n\u001b[1;32m 14\u001b[0m )\n\u001b[1;32m 16\u001b[0m iimp\u001b[38;5;241m.\u001b[39mset_output(transform\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/nvtx/nvtx.py:116\u001b[0m, in \u001b[0;36mannotate.__call__.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 116\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 117\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cudf/core/frame.py:512\u001b[0m, in \u001b[0;36mFrame.to_cupy\u001b[0;34m(self, dtype, copy, na_value)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;129m@_cudf_nvtx_annotate\u001b[39m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_cupy\u001b[39m(\n\u001b[1;32m 488\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 491\u001b[0m na_value\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 492\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m cupy\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 493\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Convert the Frame to a CuPy array.\u001b[39;00m\n\u001b[1;32m 494\u001b[0m \n\u001b[1;32m 495\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 510\u001b[0m \u001b[38;5;124;03m cupy.ndarray\u001b[39;00m\n\u001b[1;32m 511\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 512\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_to_array\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 513\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcol\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mcol\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 514\u001b[0m \u001b[43m \u001b[49m\u001b[43mcupy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 515\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 516\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[43mna_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/nvtx/nvtx.py:116\u001b[0m, in \u001b[0;36mannotate.__call__.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m libnvtx_push_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mattributes, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[0;32m--> 116\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 117\u001b[0m libnvtx_pop_range(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdomain\u001b[38;5;241m.\u001b[39mhandle)\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cudf/core/frame.py:477\u001b[0m, in \u001b[0;36mFrame._to_array\u001b[0;34m(self, get_array, module, copy, dtype, na_value)\u001b[0m\n\u001b[1;32m 470\u001b[0m matrix \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39mempty(\n\u001b[1;32m 471\u001b[0m shape\u001b[38;5;241m=\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m), ncol), dtype\u001b[38;5;241m=\u001b[39mdtype, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mF\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 472\u001b[0m )\n\u001b[1;32m 473\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, col \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data\u001b[38;5;241m.\u001b[39mvalues()):\n\u001b[1;32m 474\u001b[0m \u001b[38;5;66;03m# TODO: col.values may fail if there is nullable data or an\u001b[39;00m\n\u001b[1;32m 475\u001b[0m \u001b[38;5;66;03m# unsupported dtype. We may want to catch and provide a more\u001b[39;00m\n\u001b[1;32m 476\u001b[0m \u001b[38;5;66;03m# suitable error.\u001b[39;00m\n\u001b[0;32m--> 477\u001b[0m matrix[:, i] \u001b[38;5;241m=\u001b[39m \u001b[43mto_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m matrix\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cudf/core/frame.py:439\u001b[0m, in \u001b[0;36mFrame._to_array.<locals>.to_array\u001b[0;34m(col, dtype)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m na_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 438\u001b[0m col \u001b[38;5;241m=\u001b[39m col\u001b[38;5;241m.\u001b[39mfillna(na_value)\n\u001b[0;32m--> 439\u001b[0m array \u001b[38;5;241m=\u001b[39m \u001b[43mget_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 440\u001b[0m casted_array \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39masarray(array, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[1;32m 441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m casted_array \u001b[38;5;129;01mis\u001b[39;00m array:\n\u001b[1;32m 442\u001b[0m \u001b[38;5;66;03m# Don't double copy after asarray\u001b[39;00m\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cudf/core/frame.py:513\u001b[0m, in \u001b[0;36mFrame.to_cupy.<locals>.<lambda>\u001b[0;34m(col)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;129m@_cudf_nvtx_annotate\u001b[39m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_cupy\u001b[39m(\n\u001b[1;32m 488\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 491\u001b[0m na_value\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 492\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m cupy\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m 493\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Convert the Frame to a CuPy array.\u001b[39;00m\n\u001b[1;32m 494\u001b[0m \n\u001b[1;32m 495\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 510\u001b[0m \u001b[38;5;124;03m cupy.ndarray\u001b[39;00m\n\u001b[1;32m 511\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 512\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_to_array(\n\u001b[0;32m--> 513\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m col: \u001b[43mcol\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m,\n\u001b[1;32m 514\u001b[0m cupy,\n\u001b[1;32m 515\u001b[0m copy,\n\u001b[1;32m 516\u001b[0m dtype,\n\u001b[1;32m 517\u001b[0m na_value,\n\u001b[1;32m 518\u001b[0m )\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cudf/core/column/column.py:244\u001b[0m, in \u001b[0;36mColumnBase.values\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cupy\u001b[38;5;241m.\u001b[39marray([], dtype\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_nulls():\n\u001b[0;32m--> 244\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mColumn must have no nulls.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 246\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cupy\u001b[38;5;241m.\u001b[39masarray(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_array_view(mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwrite\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n",
"\u001b[0;31mValueError\u001b[0m: Column must have no nulls."
]
}
],
"source": [
"main_df = pd.read_csv(os.path.join(\"..\", \"data\", \"final_merge.csv\"), sep =\";\", decimal= \".\")\n",
"main_df[['Sex', 'Sweetener']] = main_df[['Sex', 'Sweetener']].astype(\"category\")\n",
"\n",
"\n",
"targets = ['N.G_urine_post']\n",
"features = ['N.G_plasm_pre', 'N_pre','N.G_urine_pre', 'N.GG_pre', 'N.S_pre', 'Total.N_pre'] \n",
"featplustargets = features + targets \n",
"to_imput = main_df[main_df[featplustargets].select_dtypes(include=np.number).columns]\n",
"\n",
"iimp = IterativeImputer(\n",
"estimator = XGBRegressor(device = \"cuda\"),\n",
"random_state = 42,\n",
"verbose = 0,\n",
")\n",
"\n",
"iimp.set_output(transform=\"pandas\")\n",
"\n",
"df_imp = iimp.fit_transform(to_imput)\n",
"\n",
"#X_train, X_test, y_train, y_test = preproc_2(df=main_df, targets=targets, features=features)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"ename": "RuntimeError",
"evalue": "CuPy failed to load libnvrtc.so.12: OSError: libnvrtc.so.12: cannot open shared object file: No such file or directory",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32mcupy_backends/cuda/_softlink.pyx:25\u001b[0m, in \u001b[0;36mcupy_backends.cuda._softlink.SoftLink.__init__\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m/usr/lib/python3.11/ctypes/__init__.py:376\u001b[0m, in \u001b[0;36mCDLL.__init__\u001b[0;34m(self, name, mode, handle, use_errno, use_last_error, winmode)\u001b[0m\n\u001b[1;32m 375\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m handle \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 376\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handle \u001b[38;5;241m=\u001b[39m \u001b[43m_dlopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
"\u001b[0;31mOSError\u001b[0m: libnvrtc.so.12: cannot open shared object file: No such file or directory",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/IPython/core/formatters.py:711\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 704\u001b[0m stream \u001b[38;5;241m=\u001b[39m StringIO()\n\u001b[1;32m 705\u001b[0m printer \u001b[38;5;241m=\u001b[39m pretty\u001b[38;5;241m.\u001b[39mRepresentationPrinter(stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose,\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_width, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnewline,\n\u001b[1;32m 707\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_seq_length,\n\u001b[1;32m 708\u001b[0m singleton_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msingleton_printers,\n\u001b[1;32m 709\u001b[0m type_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtype_printers,\n\u001b[1;32m 710\u001b[0m deferred_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeferred_printers)\n\u001b[0;32m--> 711\u001b[0m \u001b[43mprinter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpretty\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 712\u001b[0m printer\u001b[38;5;241m.\u001b[39mflush()\n\u001b[1;32m 713\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mgetvalue()\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/IPython/lib/pretty.py:419\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m meth(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[1;32m 409\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 410\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mobject\u001b[39m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;66;03m# check if cls defines __repr__\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(_safe_getattr(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__repr__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[1;32m 418\u001b[0m ):\n\u001b[0;32m--> 419\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_repr_pprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcycle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_pprint(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[1;32m 422\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/IPython/lib/pretty.py:787\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m 785\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[1;32m 786\u001b[0m \u001b[38;5;66;03m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[0;32m--> 787\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mrepr\u001b[39m(obj)\n\u001b[1;32m 788\u001b[0m lines \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39msplitlines()\n\u001b[1;32m 789\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m p\u001b[38;5;241m.\u001b[39mgroup():\n",
"File \u001b[0;32mcupy/_core/core.pyx:1742\u001b[0m, in \u001b[0;36mcupy._core.core._ndarray_base.__repr__\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/core.pyx:1839\u001b[0m, in \u001b[0;36mcupy._core.core._ndarray_base.get\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/core.pyx:2680\u001b[0m, in \u001b[0;36mcupy._core.core._internal_ascontiguousarray\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/_kernel.pyx:1375\u001b[0m, in \u001b[0;36mcupy._core._kernel.ufunc.__call__\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/_kernel.pyx:1402\u001b[0m, in \u001b[0;36mcupy._core._kernel.ufunc._get_ufunc_kernel\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/_kernel.pyx:1082\u001b[0m, in \u001b[0;36mcupy._core._kernel._get_ufunc_kernel\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/_kernel.pyx:94\u001b[0m, in \u001b[0;36mcupy._core._kernel._get_simple_elementwise_kernel\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/_kernel.pyx:82\u001b[0m, in \u001b[0;36mcupy._core._kernel._get_simple_elementwise_kernel_from_code\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy/_core/core.pyx:2258\u001b[0m, in \u001b[0;36mcupy._core.core.compile_with_cache\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cupy/cuda/compiler.py:484\u001b[0m, in \u001b[0;36m_compile_module_with_cache\u001b[0;34m(source, options, arch, cache_dir, extra_source, backend, enable_cooperative_groups, name_expressions, log_stream, jitify)\u001b[0m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _compile_with_cache_hip(\n\u001b[1;32m 481\u001b[0m source, options, arch, cache_dir, extra_source, backend,\n\u001b[1;32m 482\u001b[0m name_expressions, log_stream, cache_in_memory)\n\u001b[1;32m 483\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 484\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_compile_with_cache_cuda\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 485\u001b[0m \u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43march\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_source\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbackend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 486\u001b[0m \u001b[43m \u001b[49m\u001b[43menable_cooperative_groups\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname_expressions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlog_stream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_in_memory\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjitify\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cupy/cuda/compiler.py:499\u001b[0m, in \u001b[0;36m_compile_with_cache_cuda\u001b[0;34m(source, options, arch, cache_dir, extra_source, backend, enable_cooperative_groups, name_expressions, log_stream, cache_in_memory, jitify)\u001b[0m\n\u001b[1;32m 497\u001b[0m cache_dir \u001b[38;5;241m=\u001b[39m get_cache_dir()\n\u001b[1;32m 498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m arch \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 499\u001b[0m arch \u001b[38;5;241m=\u001b[39m \u001b[43m_get_arch\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 501\u001b[0m options \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-ftz=true\u001b[39m\u001b[38;5;124m'\u001b[39m,)\n\u001b[1;32m 503\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m enable_cooperative_groups:\n\u001b[1;32m 504\u001b[0m \u001b[38;5;66;03m# `cooperative_groups` requires relocatable device code.\u001b[39;00m\n",
"File \u001b[0;32mcupy/_util.pyx:64\u001b[0m, in \u001b[0;36mcupy._util.memoize.decorator.ret\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cupy/cuda/compiler.py:148\u001b[0m, in \u001b[0;36m_get_arch\u001b[0;34m()\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;129m@_util\u001b[39m\u001b[38;5;241m.\u001b[39mmemoize(for_each_device\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_arch\u001b[39m():\n\u001b[1;32m 146\u001b[0m \u001b[38;5;66;03m# See Supported Compile Options section of NVRTC User Guide for\u001b[39;00m\n\u001b[1;32m 147\u001b[0m \u001b[38;5;66;03m# the maximum value allowed for `--gpu-architecture`.\u001b[39;00m\n\u001b[0;32m--> 148\u001b[0m nvrtc_max_compute_capability \u001b[38;5;241m=\u001b[39m \u001b[43m_get_max_compute_capability\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 150\u001b[0m arch \u001b[38;5;241m=\u001b[39m device\u001b[38;5;241m.\u001b[39mDevice()\u001b[38;5;241m.\u001b[39mcompute_capability\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m arch \u001b[38;5;129;01min\u001b[39;00m _tegra_archs:\n",
"File \u001b[0;32mcupy/_util.pyx:64\u001b[0m, in \u001b[0;36mcupy._util.memoize.decorator.ret\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cupy/cuda/compiler.py:126\u001b[0m, in \u001b[0;36m_get_max_compute_capability\u001b[0;34m()\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;129m@_util\u001b[39m\u001b[38;5;241m.\u001b[39mmemoize()\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_max_compute_capability\u001b[39m():\n\u001b[0;32m--> 126\u001b[0m major, minor \u001b[38;5;241m=\u001b[39m \u001b[43m_get_nvrtc_version\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m major \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m11\u001b[39m:\n\u001b[1;32m 128\u001b[0m \u001b[38;5;66;03m# CUDA 10.2\u001b[39;00m\n\u001b[1;32m 129\u001b[0m nvrtc_max_compute_capability \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m75\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
"File \u001b[0;32m~/main-venv/lib/python3.11/site-packages/cupy/cuda/compiler.py:115\u001b[0m, in \u001b[0;36m_get_nvrtc_version\u001b[0;34m()\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mglobal\u001b[39;00m _nvrtc_version\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _nvrtc_version \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 115\u001b[0m _nvrtc_version \u001b[38;5;241m=\u001b[39m \u001b[43mnvrtc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetVersion\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _nvrtc_version\n",
"File \u001b[0;32mcupy_backends/cuda/libs/nvrtc.pyx:56\u001b[0m, in \u001b[0;36mcupy_backends.cuda.libs.nvrtc.getVersion\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy_backends/cuda/libs/nvrtc.pyx:57\u001b[0m, in \u001b[0;36mcupy_backends.cuda.libs.nvrtc.getVersion\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy_backends/cuda/libs/_cnvrtc.pxi:72\u001b[0m, in \u001b[0;36mcupy_backends.cuda.libs.nvrtc.initialize\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy_backends/cuda/libs/_cnvrtc.pxi:76\u001b[0m, in \u001b[0;36mcupy_backends.cuda.libs.nvrtc._initialize\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy_backends/cuda/libs/_cnvrtc.pxi:143\u001b[0m, in \u001b[0;36mcupy_backends.cuda.libs.nvrtc._get_softlink\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mcupy_backends/cuda/_softlink.pyx:32\u001b[0m, in \u001b[0;36mcupy_backends.cuda._softlink.SoftLink.__init__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mRuntimeError\u001b[0m: CuPy failed to load libnvrtc.so.12: OSError: libnvrtc.so.12: cannot open shared object file: No such file or directory"
]
}
],
"source": [
"X_train = cudf.DataFrame.from_pandas(df_imp).to_cupy()\n",
"X_train"
]
}
],
"metadata": {

View file

@ -1,8 +1,11 @@
from fit_models_CV import XGBfit_CV_score
from fit_models_CV import XGBfit_GridS_CV, XGBfit_CV_bayesian
from utils import preproc_2
import pandas as pd
import numpy as np
import cupy as cp
import os
import time
import pickle
main_df = pd.read_csv(os.path.join("..", "data", "final_merge.csv"), sep =";", decimal= ".")
main_df[['Sex', 'Sweetener']] = main_df[['Sex', 'Sweetener']].astype("category")
@ -19,18 +22,17 @@ def testing_funct(df, dir_name, targets, features, model_func):
os.mkdir(directory)
X_train, X_test, y_train, y_test = preproc_2(df=df, targets=targets, features=features)
cv_scores = model_func(X_train, X_test, y_train, y_test)
with open(os.path.join(directory,"prueba_"+dir_name+"_modelo.pkl"), "wb") as f:
pickle.dump(cv_scores, f, protocol = 5)
print('Mejor score:', study.best_value)
r2_scores = cv_scores_prueba["test_r2"]
MAE_scores = np.abs(cv_scores_prueba["test_neg_mean_absolute_error"])
RMSE_scores = np.abs(cv_scores_prueba["test_neg_root_mean_squared_error"])
r2_scores = cv_scores["test_r2"]
MAE_scores = np.abs(cv_scores["test_neg_mean_absolute_error"])
RMSE_scores = np.abs(cv_scores["test_neg_root_mean_squared_error"])
print(r2_scores)
print('R2 scores: %.3f (%.3f)' % (np.mean(r2_scores), np.std(r2_scores)))
@ -45,7 +47,7 @@ study_features = ["Sex", "Sweetener"]
targets = ['VA.GG_plasm_post']
features = ['VA_plasm_pre','VA.GG_plasm_pre', 'VA.S_pre', 'VA.GS_plasm_pre', 'VA.SS_plasm_pre', 'Total.VA_plasm_pre'] + study_features
testing_funct (df = main_df, dir_name = "VA-GG_case", targets=targets, features=features, model_func= XGBfit_CV_score)
testing_funct (df = main_df, dir_name = "VA-GG_case", targets=targets, features=features, model_func= XGBfit_CV_bayesian)
# N-G case
@ -53,6 +55,6 @@ testing_funct (df = main_df, dir_name = "VA-GG_case", targets=targets, features=
target = ['N.G_urine_post']
features = ['N.G_plasm_pre', 'N_pre','N.G_urine_pre', 'N.GG_pre', 'N.S_pre', 'Total.N_pre'] + study_features
testing_funct (df = main_df, dir_name = "N-G_case", targets=targets, features=features, model_func= XGBfit_CV_score)
testing_funct (df = main_df, dir_name = "N-G_case", targets=targets, features=features, model_func= XGBfit_CV_bayesian)

View file

@ -21,7 +21,7 @@ def preproc_2 (df, features, targets):
not_imput = df.columns.drop(df[featplustargets].select_dtypes(include=np.number).columns)
iimp = IterativeImputer(
estimator = XGBRegressor(),
estimator = XGBRegressor(device = "cpu"),
random_state = 42,
verbose = 0,
)
@ -29,7 +29,7 @@ def preproc_2 (df, features, targets):
iimp.set_output(transform="pandas")
df_imp = iimp.fit_transform(to_imput)
df_imp[not_imput] = df[not_imput]
#df_imp[not_imput] = df[not_imput]
X_train, X_test, y_train, y_test = train_test_split(
df[features],