-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
198 lines (162 loc) · 6.96 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import streamlit as st
import pandas as pd
import numpy as np
from scipy.stats import ks_2samp
from sklearn.metrics import roc_curve
from sklearn import metrics
import matplotlib.pyplot as plt
## GenIA
import google.generativeai as genai
import json
# Define your API key
API_KEY = "AIzaSyCLY-K449EXP04NAMu2XEugi29HWGYdMlY"
# Function to initialize session state
def initialize_session_state():
if 'api_key_correct' not in st.session_state:
st.session_state.api_key_correct = False
# Main Streamlit app
def text_page():
# Initialize session state
initialize_session_state()
# API key input
api_key = st.sidebar.text_input("Enter your API key:", type="password")
# Check if the API key is correct
if api_key == API_KEY:
st.session_state.api_key_correct = True
else:
if st.session_state.api_key_correct:
st.sidebar.success("API key is correct!")
else:
st.sidebar.error("Incorrect API key. Please try again.")
st.stop()
# Configure the Generative AI API with the provided key
genai.configure(api_key=API_KEY)
# Manual model configuration options
temperature = 0.9
top_p = 1.0
top_k = 1
max_output_tokens = 2048
# Set up the model configuration dictionary manually
generation_config = {
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"max_output_tokens": max_output_tokens,
}
safety_settings = "{}" # Placeholder for safety settings, can be modified as needed
safety_settings = json.loads(safety_settings)
# Initialize the generative model
gemini = genai.GenerativeModel(
model_name="gemini-pro",
generation_config=generation_config,
safety_settings=safety_settings
)
# Función KS
def evaluate_ks(y_real, y_proba):
df = pd.DataFrame({
'real': y_real,
'proba': y_proba
})
class0_proba = df.loc[df['real'] == 0, 'proba']
class1_proba = df.loc[df['real'] == 1, 'proba']
ks_result = ks_2samp(class0_proba, class1_proba)
st.write(f"KS: {ks_result.statistic:.4f} (p-value: {ks_result.pvalue:.3e})")
return ks_result.statistic
# Función para calcular el umbral óptimo
def calcular_umbral_optimo(y_real, proba):
fpr, tpr, thresholds = roc_curve(y_real, proba)
diferencia = tpr - fpr
optimal_idx = np.argmax(diferencia)
optimal_threshold = thresholds[optimal_idx]
return optimal_threshold
# Función para calcular métricas y graficar la matriz de confusión
def calcular_metricas_y_graficar(y_real, proba):
umbral_optimo = calcular_umbral_optimo(y_real, proba)
y_pred = np.where(proba >= umbral_optimo, 1, 0)
confusion_matrix = metrics.confusion_matrix(y_real, y_pred)
# Graficar matriz de confusión
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix=confusion_matrix, display_labels=[False, True])
fig, ax = plt.subplots(figsize=(8, 6))
cm_display.plot(ax=ax, cmap=plt.cm.YlGnBu)
st.pyplot(fig)
# Calcular y mostrar métricas
Accuracy = metrics.accuracy_score(y_real, y_pred)
Precision = metrics.precision_score(y_real, y_pred)
Sensitivity_recall = metrics.recall_score(y_real, y_pred)
Specificity = metrics.recall_score(y_real, y_pred, pos_label=0)
F1_score = metrics.f1_score(y_real, y_pred)
st.write(f"Umbral óptimo: {umbral_optimo:.4f}")
st.write(f"Accuracy: {Accuracy:.4f}")
st.write(f"Precision: {Precision:.4f}")
st.write(f"Sensitivity/Recall: {Sensitivity_recall:.4f}")
st.write(f"Specificity: {Specificity:.4f}")
st.write(f"F1 Score: {F1_score:.4f}")
# Función para calcular veintiles
def calcular_veintiles(df, y_real_col, prob_col):
percentiles = [i / 20 for i in range(1, 20)]
veintiles = df[prob_col].quantile(percentiles)
def clasificar_veintil(valor, veintiles):
for i, veintil in enumerate(veintiles):
if valor <= veintil:
return i + 1
return len(veintiles) + 1
df['Veintil_prob'] = df[prob_col].apply(lambda x: clasificar_veintil(x, veintiles))
df_0 = df[df[y_real_col] == 0]
df_1 = df[df[y_real_col] == 1]
counts_0 = df_0['Veintil_prob'].value_counts().sort_index()
counts_1 = df_1['Veintil_prob'].value_counts().sort_index()
veintil_df = pd.DataFrame({
'Veintil_prob': range(1, 21),
'N°Buenos': [counts_0.get(i, 0) for i in range(1, 21)],
'N°Malos': [counts_1.get(i, 0) for i in range(1, 21)]
}).set_index('Veintil_prob')
veintil_df['Total_casos'] = veintil_df['N°Buenos'] + veintil_df['N°Malos']
veintil_df['Buenos_acum'] = veintil_df['N°Buenos'].cumsum()
veintil_df['Malos_acum'] = veintil_df['N°Malos'].cumsum()
veintil_df['Total_acum'] = veintil_df['Total_casos'].cumsum()
veintil_df['%Malo_Grupo'] = (veintil_df['N°Malos'] / veintil_df['Total_casos'] * 100).round(1).apply(lambda x: f"{x:.1f}%")
veintil_df['%Buenos_acum'] = (veintil_df['Buenos_acum'] / veintil_df['N°Buenos'].sum() * 100).round(1).apply(lambda x: f"{x:.1f}%")
veintil_df['%Malos_acum'] = (veintil_df['Malos_acum'] / veintil_df['N°Malos'].sum() * 100).round(1).apply(lambda x: f"{x:.1f}%")
st.write(veintil_df)
# Configuración de la aplicación de Streamlit
st.title("Métricas IA")
# Subir archivo CSV
uploaded_file = st.file_uploader("Sube un archivo CSV", type=["csv"])
if uploaded_file is not None:
# Leer CSV
df = pd.read_csv(uploaded_file)
st.write("Contenido del archivo CSV:")
st.dataframe(df)
# Seleccionar columnas para 'y_real' y 'proba'
y_real_col = st.selectbox("Selecciona la columna Target:", df.columns)
prob_col = st.selectbox("Selecciona la columna de probabilidades:", df.columns)
filtro = st.selectbox("Selecciona la columna de filtro:", df.columns)
# Mostrar análisis
if st.button("Ejecutar análisis"):
y_real_train = df[df[filtro] == 'train'][y_real_col]
proba_train = df[df[filtro] == 'train'][prob_col]
y_real_oot = df[df[filtro] == 'oot'][y_real_col]
proba_oot = df[df[filtro] == 'oot'][prob_col]
# KS
st.subheader("Resultado del Test KS")
prompt = 'Que es la prueba de kolgomorov?'
prompt_parts = [prompt]
st.write(gemini.generate_content(prompt_parts).text)
st.write("Train:")
ks_stat_train = evaluate_ks(y_real_train, proba_train)
st.write("OOT:")
ks_stat_oot = evaluate_ks(y_real_oot, proba_oot)
# Métricas y matriz de confusión
st.subheader("Matriz de Confusión y Métricas")
st.write("Train:")
calcular_metricas_y_graficar(y_real_train, proba_train)
st.write("OOT:")
calcular_metricas_y_graficar(y_real_oot, proba_oot)
# Veintiles
st.subheader("Tabla de Eficiencia")
st.write("Train:")
calcular_veintiles(df[df[filtro] == 'train'], y_real_col, prob_col)
st.write("OOT:")
calcular_veintiles(df[df[filtro] == 'oot'], y_real_col, prob_col)
# Llamada a la función principal
text_page()