Actualizar archivos y añadir imágenes
This commit is contained in:
parent
3451a55365
commit
5248ed6084
File diff suppressed because it is too large
Load Diff
Binary file not shown.
After Width: | Height: | Size: 168 KiB |
Binary file not shown.
After Width: | Height: | Size: 118 KiB |
Binary file not shown.
After Width: | Height: | Size: 120 KiB |
Binary file not shown.
After Width: | Height: | Size: 90 KiB |
Binary file not shown.
After Width: | Height: | Size: 100 KiB |
|
@ -1,43 +1,59 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import utilerias as ut
|
||||
|
||||
|
||||
def formatea_fecha(data):
|
||||
data.loc[:, "fecha_completa"] = data.loc[:, "fecha"] + \
|
||||
" " + data.loc[:, "hora_recepcion"]
|
||||
data.loc[:, "fecha_completa"] = (
|
||||
data.loc[:, "fecha"] + " " + data.loc[:, "hora_recepcion"]
|
||||
)
|
||||
|
||||
data.loc[:, "fecha_completa"] = pd.to_datetime(
|
||||
data.loc[:, "fecha_completa"], format="%d/%m/%y %H:%M:%S")
|
||||
data.loc[:, "fecha_completa"], format="%d/%m/%y %H:%M:%S"
|
||||
)
|
||||
|
||||
|
||||
def limpia_descripcion_cierre(data):
|
||||
renombre_de_valores = {"Cierre De Fuga": "Otros",
|
||||
"Fuera De Entidad Federativa": "Otros",
|
||||
"Fuera De Subzona": "Otros",
|
||||
"Remitido Al M.p.": "Hecho real",
|
||||
"Resolucion 1": "Hecho real",
|
||||
"Resolucion 5": "Hecho real"}
|
||||
data.loc[:, "descripcion_cierre"] = data.loc[:,
|
||||
"descripcion_cierre"].replace(renombre_de_valores)
|
||||
renombre_de_valores = {
|
||||
"Cierre De Fuga": "Otros",
|
||||
"Fuera De Entidad Federativa": "Otros",
|
||||
"Fuera De Subzona": "Otros",
|
||||
"Remitido Al M.p.": "Hecho real",
|
||||
"Resolucion 1": "Hecho real",
|
||||
"Resolucion 5": "Hecho real",
|
||||
}
|
||||
data.loc[:, "descripcion_cierre"] = data.loc[:, "descripcion_cierre"].replace(
|
||||
renombre_de_valores
|
||||
)
|
||||
|
||||
|
||||
def limpia_via_recepcion(data):
|
||||
renombre_de_valores = {"060 Monterrey": "Otros",
|
||||
"80": "Otros",
|
||||
"89": "Otros",
|
||||
"App": "Otros",
|
||||
"App 9-1-1": "Otros",
|
||||
"Cic": "Otros",
|
||||
"Rutina": "Gobierno",
|
||||
"Sala crisis": "Gobierno",
|
||||
"Telefonia movil": "Telefono",
|
||||
"Videovigilancia": "Gobierno",
|
||||
"Whats App": "Otros"}
|
||||
data.loc[:, "via_recepcion"] = data.loc[:,
|
||||
"via_recepcion"].replace(renombre_de_valores)
|
||||
renombre_de_valores = {
|
||||
"060 Monterrey": "Otros",
|
||||
"80": "Otros",
|
||||
"89": "Otros",
|
||||
"App": "Otros",
|
||||
"App 9-1-1": "Otros",
|
||||
"Cic": "Otros",
|
||||
"Rutina": "Gobierno",
|
||||
"Sala crisis": "Gobierno",
|
||||
"Telefonia movil": "Telefono",
|
||||
"Videovigilancia": "Gobierno",
|
||||
"Whats App": "Otros",
|
||||
}
|
||||
data.loc[:, "via_recepcion"] = data.loc[:, "via_recepcion"].replace(
|
||||
renombre_de_valores
|
||||
)
|
||||
|
||||
|
||||
def guarda_en_csv(data):
|
||||
nombre_de_archivo = "data/preprocessed_dvgm.csv"
|
||||
data_path = ut.abs_path(nombre_de_archivo)
|
||||
data.to_csv(data_path)
|
||||
|
||||
|
||||
def run(data):
|
||||
formatea_fecha(data)
|
||||
limpia_descripcion_cierre(data)
|
||||
limpia_via_recepcion(data)
|
||||
guarda_en_csv(data)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import statsmodels.api as sm
|
||||
import matplotlib.pyplot as plt
|
||||
import preprocesar_datos as p_datos
|
||||
import utilerias as ut
|
||||
|
||||
|
@ -7,27 +9,269 @@ import utilerias as ut
|
|||
class SerieDeTiempo:
|
||||
"""Esta clase gestiona la serie de tiempo."""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, variable=None, freq=30, alpha=0.05, steps=30, savefig=False):
|
||||
self.iniciar_variables(
|
||||
variable=variable, freq=freq, alpha=alpha, steps=steps, savefig=savefig
|
||||
)
|
||||
self.carga_datos()
|
||||
self.definir_variables()
|
||||
self.preprocesar_datos()
|
||||
self.plot_timeseries()
|
||||
self.decompose_timeseries()
|
||||
self.determine_arima_params()
|
||||
self.fit_arima()
|
||||
self.validate_forecast()
|
||||
self.get_forecast()
|
||||
|
||||
def iniciar_variables(
|
||||
self, variable=None, freq=30, alpha=0.05, steps=30, savefig=False
|
||||
):
|
||||
"""This method saves init variables."""
|
||||
self.variable = None
|
||||
self.freq = freq
|
||||
self.alpha = alpha
|
||||
self.steps = steps
|
||||
self._savefig = savefig
|
||||
|
||||
def carga_datos(self):
|
||||
"""Carga los datos de los reportes al 911."""
|
||||
try:
|
||||
nombre_de_archivo = "data/preprocessed_dvgm.csv"
|
||||
data_path = ut.abs_path(nombre_de_archivo)
|
||||
self.data = pd.read_csv(data_path)
|
||||
self.data.loc[:, "fecha_completa"] = pd.to_datetime(
|
||||
self.data.loc[:, "fecha_completa"], format="%d/%m/%y %H:%M:%S"
|
||||
)
|
||||
except:
|
||||
nombre_de_archivo = "data/dvgm.csv"
|
||||
data_path = ut.abs_path(nombre_de_archivo)
|
||||
self.data = pd.read_csv(data_path)
|
||||
self.preprocesar_datos()
|
||||
|
||||
nombre_de_archivo = "data/dvgm.csv"
|
||||
data_path = ut.abs_path(nombre_de_archivo)
|
||||
self.data = pd.read_csv(data_path)
|
||||
def preprocesar_datos(self):
|
||||
"""Método que preprocesa los datos que lo requieren."""
|
||||
p_datos.run(self.data)
|
||||
# print(self.data.groupby("via_recepcion").count())
|
||||
|
||||
def definir_variables(self):
|
||||
"""Método que define variables necesarias respecto a los datos."""
|
||||
|
||||
self.atributos = self.data.columns.values
|
||||
|
||||
def preprocesar_datos(self):
|
||||
"""Método que preprocesa los datos que lo requieren."""
|
||||
p_datos.run(self.data)
|
||||
print(self.data.groupby("via_recepcion").count())
|
||||
if self.variable:
|
||||
self.valores_de_variable = self.data.loc[:, self.variable].unique()
|
||||
else:
|
||||
self.valores_de_variable = ["General"]
|
||||
self.generate_timeseries()
|
||||
|
||||
def generate_timeseries(self):
|
||||
"""This method generates the timeseries based on the data."""
|
||||
self.ts = {}
|
||||
for valor_de_variable in self.valores_de_variable:
|
||||
if self.variable:
|
||||
ts = (
|
||||
self.data.loc[self.data.loc[:, self.variable] == valor_de_variable]
|
||||
.groupby(self.data.loc[:, "fecha_completa"].dt.date)
|
||||
.count()
|
||||
.loc[:, "numero_reporte"]
|
||||
.asfreq("D")
|
||||
)
|
||||
else:
|
||||
ts = (
|
||||
self.data.groupby(self.data.loc[:, "fecha_completa"].dt.date)
|
||||
.count()
|
||||
.loc[:, "numero_reporte"]
|
||||
.asfreq("D")
|
||||
)
|
||||
date_min = ts.index.min()
|
||||
date_max = ts.index.max()
|
||||
idx = pd.date_range(date_min, date_max)
|
||||
ts = ts.reindex(idx)
|
||||
ts = ts.interpolate()
|
||||
self.ts[valor_de_variable] = ts.copy()
|
||||
|
||||
def savefig(self, fig_name="images/untitled.png"):
|
||||
"""This methos saves figures with a given name."""
|
||||
fig_path = ut.abs_path(fig_name)
|
||||
if self._savefig:
|
||||
plt.savefig(fig_path)
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
def plot_timeseries(self):
|
||||
"""This method plots the timeseries for each country."""
|
||||
for valor_de_variable in self.valores_de_variable:
|
||||
self.ts[valor_de_variable].plot(figsize=(15, 5))
|
||||
plt.title(
|
||||
"Reportes al 911 - Serie de Tiempo - {}".format(valor_de_variable)
|
||||
)
|
||||
plt.xlabel("fecha")
|
||||
plt.ylabel("# de reportes")
|
||||
fig_name = "images/{}_timeseries.png".format(valor_de_variable)
|
||||
self.savefig(fig_name)
|
||||
|
||||
def decompose_timeseries(self):
|
||||
"""This method applies seasonal decomposition to timeseries."""
|
||||
self.decompose = {}
|
||||
for valor_de_variable in self.valores_de_variable:
|
||||
decomposition = sm.tsa.seasonal_decompose(
|
||||
self.ts[valor_de_variable], model="additive", period=self.freq
|
||||
)
|
||||
fig = decomposition.plot()
|
||||
fig.set_size_inches((15, 7))
|
||||
fig.suptitle(
|
||||
"Reportes al 911 - Serie de Tiempo - {} decomposition".format(
|
||||
valor_de_variable
|
||||
)
|
||||
)
|
||||
fig_name = "images/{}_decomposition_{}.png".format(
|
||||
valor_de_variable, self.freq
|
||||
)
|
||||
self.savefig(fig_name)
|
||||
|
||||
def grid_search(self, valor_de_variable, pdq, seasonal_pdq, display=False):
|
||||
"""This method implements the grid search."""
|
||||
best_aic = 99999.9
|
||||
best_pdq = (0, 0, 0)
|
||||
best_seasonal_pdq = (0, 0, 0, 0)
|
||||
for order in pdq:
|
||||
for seasonal_order in seasonal_pdq:
|
||||
try:
|
||||
mod = sm.tsa.statespace.SARIMAX(
|
||||
self.ts[valor_de_variable],
|
||||
order=order,
|
||||
seasonal_order=seasonal_order,
|
||||
enforce_stationarity=False,
|
||||
enforce_invertibility=False,
|
||||
)
|
||||
results = mod.fit(disp=False)
|
||||
|
||||
if display:
|
||||
print(
|
||||
"{} - ARIMA{}x{} - AIC: {:.2f}".format(
|
||||
valor_de_variable, order, seasonal_order, results.aic
|
||||
)
|
||||
)
|
||||
|
||||
if results.aic < best_aic:
|
||||
best_aic = results.aic
|
||||
best_pdq = order
|
||||
best_seasonal_pdq = seasonal_order
|
||||
except:
|
||||
continue
|
||||
return (best_pdq, best_seasonal_pdq, best_aic)
|
||||
|
||||
def determine_arima_params(self):
|
||||
"""This method determines ARIMA params based on a grid search."""
|
||||
p = d = q = range(0, 2)
|
||||
pdq = [(i, j, k) for i in p for j in d for k in q]
|
||||
seasonal_pdq = [(x[0], x[1], x[2], self.freq) for x in pdq]
|
||||
|
||||
self.pdq = {}
|
||||
self.seasonal_pdq = {}
|
||||
self.aic = {}
|
||||
|
||||
for valor_de_variable in self.valores_de_variable:
|
||||
print("\n\n------------------------------------")
|
||||
print("{} ARIMA Grid Search ...".format(valor_de_variable))
|
||||
print("------------------------------------\n\n")
|
||||
|
||||
(best_pdq, best_seasonal_pdq, best_aic) = self.grid_search(
|
||||
valor_de_variable, pdq, seasonal_pdq
|
||||
)
|
||||
|
||||
print("\nBest params for {} are:".format(valor_de_variable))
|
||||
print(
|
||||
"ARIMA{}x{} - AIC: {:.2f}".format(best_pdq, best_seasonal_pdq, best_aic)
|
||||
)
|
||||
self.pdq[valor_de_variable] = best_pdq
|
||||
self.seasonal_pdq[valor_de_variable] = best_seasonal_pdq
|
||||
self.aic[valor_de_variable] = best_aic
|
||||
|
||||
def fit_arima(self):
|
||||
"""This method fits the ARIMA model based on best parameters."""
|
||||
self.results = {}
|
||||
for valor_de_variable in self.valores_de_variable:
|
||||
mod = sm.tsa.statespace.SARIMAX(
|
||||
self.ts[valor_de_variable],
|
||||
order=self.pdq[valor_de_variable],
|
||||
seasonal_order=self.seasonal_pdq[valor_de_variable],
|
||||
enforce_stationarity=False,
|
||||
enforce_invertibility=False,
|
||||
)
|
||||
results = mod.fit(disp=False)
|
||||
print("\n\n------------------------------------")
|
||||
print("{} Timeseries ARIMA results".format(valor_de_variable))
|
||||
print("------------------------------------\n\n")
|
||||
print(results.summary().tables[1])
|
||||
results.plot_diagnostics(figsize=(15, 7))
|
||||
fig_name = "images/{}_residuals_{}.png".format(valor_de_variable, self.freq)
|
||||
self.savefig(fig_name)
|
||||
self.results[valor_de_variable] = results
|
||||
|
||||
def validate_forecast(self):
|
||||
"""This method validates the forecast and calculates RMSE."""
|
||||
for valor_de_variable in self.valores_de_variable:
|
||||
prediction = self.results[valor_de_variable].get_prediction(
|
||||
start="2022-11-01", dynamic=False
|
||||
)
|
||||
confidence_interval = prediction.conf_int(alpha=self.alpha)
|
||||
ax = self.ts[valor_de_variable].plot(label="Observed", figsize=(15, 7))
|
||||
prediction.predicted_mean.plot(
|
||||
ax=ax, label="One-step ahead Forecast", alpha=0.7
|
||||
)
|
||||
ax.fill_between(
|
||||
confidence_interval.index,
|
||||
confidence_interval.iloc[:, 0],
|
||||
confidence_interval.iloc[:, 1],
|
||||
color="k",
|
||||
alpha=0.2,
|
||||
)
|
||||
ax.set_title(
|
||||
"Reportes al 911 - Serie de Tiempo - {}".format(valor_de_variable)
|
||||
)
|
||||
ax.set_xlabel("Fecha")
|
||||
ax.set_ylabel("# de reportes")
|
||||
plt.legend()
|
||||
fig_name = "images/{}_prediction_{}.png".format(
|
||||
valor_de_variable, self.freq
|
||||
)
|
||||
self.savefig(fig_name)
|
||||
|
||||
observed = self.ts[valor_de_variable].loc["2022-11-01":]
|
||||
forecasted = prediction.predicted_mean
|
||||
mse = ((forecasted - observed) ** 2).mean()
|
||||
print("\n\n------------------------------------")
|
||||
print("{} Timeseries ARIMA results".format(valor_de_variable))
|
||||
print("------------------------------------\n\n")
|
||||
print("The Mean Squared Error (MSE) of the forecast is {:.2f}".format(mse))
|
||||
print(
|
||||
"The Root Mean Squared Error (RMSE) of the forecast is {:.2f}".format(
|
||||
np.sqrt(mse)
|
||||
)
|
||||
)
|
||||
|
||||
def get_forecast(self):
|
||||
"""This method does the forecast."""
|
||||
for valor_de_variable in self.valores_de_variable:
|
||||
forecast = self.results[valor_de_variable].get_forecast(steps=self.steps)
|
||||
confidence_interval = forecast.conf_int(alpha=self.alpha)
|
||||
ax = self.ts[valor_de_variable].plot(label="Observed", figsize=(15, 7))
|
||||
forecast.predicted_mean.plot(ax=ax, label="Forecast")
|
||||
ax.fill_between(
|
||||
confidence_interval.index,
|
||||
confidence_interval.iloc[:, 0],
|
||||
confidence_interval.iloc[:, 1],
|
||||
color="k",
|
||||
alpha=0.2,
|
||||
)
|
||||
ax.set_title(
|
||||
"Reportes al 911 - Serie de Tiempo - {}".format(valor_de_variable)
|
||||
)
|
||||
ax.set_xlabel("Fecha")
|
||||
ax.set_ylabel("# de reportes")
|
||||
plt.legend()
|
||||
fig_name = "images/{}_forecast_{}.png".format(valor_de_variable, self.freq)
|
||||
self.savefig(fig_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue