|
| 1 | +# Copyright (c) Microsoft Corporation. |
| 2 | +# Licensed under the MIT License. |
| 3 | + |
| 4 | +from io import StringIO |
| 5 | + |
| 6 | +import streamlit as st |
| 7 | + |
| 8 | +from src.components.upload_files_component import upload_files |
| 9 | +from src.enums import PromptKeys |
| 10 | +from src.functions import GraphragAPI |
| 11 | + |
| 12 | + |
| 13 | +class IndexPipeline: |
| 14 | + def __init__(self, client: GraphragAPI, column_widths: list[float]) -> None: |
| 15 | + self.client = client |
| 16 | + self.containers = client.get_storage_container_names() |
| 17 | + self.column_widths = column_widths |
| 18 | + |
| 19 | + def storage_data_step(self): |
| 20 | + """ |
| 21 | + Builds the Storage Data Step for the Indexing Pipeline. |
| 22 | + """ |
| 23 | + |
| 24 | + disable_other_input = False |
| 25 | + _, col2, _ = st.columns(self.column_widths) |
| 26 | + |
| 27 | + with col2: |
| 28 | + st.header( |
| 29 | + "1. Data Storage", |
| 30 | + divider=True, |
| 31 | + help="Select a Data Storage Container to upload data to or select an existing container to use for indexing. The data will be processed by the LLM to create a Knowledge Graph.", |
| 32 | + ) |
| 33 | + select_storage_name = st.selectbox( |
| 34 | + label="Select an existing Storage Container.", |
| 35 | + options=[""] + self.containers |
| 36 | + if isinstance(self.containers, list) |
| 37 | + else [], |
| 38 | + key="index-storage", |
| 39 | + index=0, |
| 40 | + ) |
| 41 | + |
| 42 | + if select_storage_name != "": |
| 43 | + disable_other_input = True |
| 44 | + st.write("Or...") |
| 45 | + with st.expander("Upload data to a storage container."): |
| 46 | + # TODO: validate storage container name before uploading |
| 47 | + # TODO: add user message that option not available while existing storage container is selected |
| 48 | + upload_files( |
| 49 | + self.client, |
| 50 | + key_prefix="index", |
| 51 | + disable_other_input=disable_other_input, |
| 52 | + ) |
| 53 | + |
| 54 | + if select_storage_name != "": |
| 55 | + disable_other_input = True |
| 56 | + |
| 57 | + def build_index_step(self): |
| 58 | + """ |
| 59 | + Creates the Build Index Step for the Indexing Pipeline. |
| 60 | + """ |
| 61 | + _, col2, _ = st.columns(self.column_widths) |
| 62 | + with col2: |
| 63 | + st.header( |
| 64 | + "2. Build Index", |
| 65 | + divider=True, |
| 66 | + help="Building an index will process the data from step 1 and create a Knowledge Graph suitable for querying. The LLM will use either the default prompt configuration or the prompts that you generated previously. To track the status of an indexing job, use the check index status below.", |
| 67 | + ) |
| 68 | + # use data from either the selected storage container or the uploaded data |
| 69 | + select_storage_name = st.session_state["index-storage"] |
| 70 | + input_storage_name = ( |
| 71 | + st.session_state["index-storage-name-input"] |
| 72 | + if st.session_state["index-upload-button"] |
| 73 | + else "" |
| 74 | + ) |
| 75 | + storage_selection = select_storage_name or input_storage_name |
| 76 | + |
| 77 | + # Allow user to choose either default or custom prompts |
| 78 | + custom_prompts = any([st.session_state[k.value] for k in PromptKeys]) |
| 79 | + prompt_options = ["Default", "Custom"] if custom_prompts else ["Default"] |
| 80 | + prompt_choice = st.radio( |
| 81 | + "Choose LLM Prompt Configuration", |
| 82 | + options=prompt_options, |
| 83 | + index=1 if custom_prompts else 0, |
| 84 | + key="prompt-config-choice", |
| 85 | + horizontal=True, |
| 86 | + ) |
| 87 | + |
| 88 | + # Create new index name |
| 89 | + index_name = st.text_input("Enter Index Name", key="index-name-input") |
| 90 | + |
| 91 | + st.write(f"Selected Storage Container: **:blue[{storage_selection}]**") |
| 92 | + if st.button( |
| 93 | + "Build Index", |
| 94 | + help="You must enter both an Index Name and Select a Storage Container to enable this button", |
| 95 | + disabled=not index_name or not storage_selection, |
| 96 | + ): |
| 97 | + entity_prompt = ( |
| 98 | + StringIO(st.session_state[PromptKeys.ENTITY.value]) |
| 99 | + if prompt_choice == "Custom" |
| 100 | + else None |
| 101 | + ) |
| 102 | + summarize_prompt = ( |
| 103 | + StringIO(st.session_state[PromptKeys.SUMMARY.value]) |
| 104 | + if prompt_choice == "Custom" |
| 105 | + else None |
| 106 | + ) |
| 107 | + community_prompt = ( |
| 108 | + StringIO(st.session_state[PromptKeys.COMMUNITY.value]) |
| 109 | + if prompt_choice == "Custom" |
| 110 | + else None |
| 111 | + ) |
| 112 | + |
| 113 | + response = self.client.build_index( |
| 114 | + storage_name=storage_selection, |
| 115 | + index_name=index_name, |
| 116 | + entity_extraction_prompt_filepath=entity_prompt, |
| 117 | + summarize_description_prompt_filepath=summarize_prompt, |
| 118 | + community_prompt_filepath=community_prompt, |
| 119 | + ) |
| 120 | + |
| 121 | + if response.status_code == 200: |
| 122 | + st.success( |
| 123 | + f"Job submitted successfully, using {prompt_choice} prompts!" |
| 124 | + ) |
| 125 | + else: |
| 126 | + st.error( |
| 127 | + f"Failed to submit job.\nStatus: {response.json()['detail']}" |
| 128 | + ) |
| 129 | + |
| 130 | + def check_status_step(self): |
| 131 | + """ |
| 132 | + Checks the progress of a running indexing job. |
| 133 | + """ |
| 134 | + _, col2, _ = st.columns(self.column_widths) |
| 135 | + with col2: |
| 136 | + st.header( |
| 137 | + "3. Check Index Status", |
| 138 | + divider=True, |
| 139 | + help="Select an index to check the status of what stage indexing is in. Indexing must be complete in order to be able to execute queries.", |
| 140 | + ) |
| 141 | + |
| 142 | + options_indexes = self.client.get_index_names() |
| 143 | + # create logic for defaulting to running job index if one exists |
| 144 | + new_index_name = st.session_state["index-name-input"] |
| 145 | + default_index = ( |
| 146 | + options_indexes.index(new_index_name) |
| 147 | + if new_index_name in options_indexes |
| 148 | + else 0 |
| 149 | + ) |
| 150 | + index_name_select = st.selectbox( |
| 151 | + label="Select an index to check its status.", |
| 152 | + options=options_indexes if any(options_indexes) else [], |
| 153 | + index=default_index, |
| 154 | + ) |
| 155 | + progress_bar = st.progress(0, text="Index Job Progress") |
| 156 | + if st.button("Check Status"): |
| 157 | + status_response = self.client.check_index_status(index_name_select) |
| 158 | + if status_response.status_code == 200: |
| 159 | + status_response_text = status_response.json() |
| 160 | + if status_response_text["status"] != "": |
| 161 | + try: |
| 162 | + # build status message |
| 163 | + job_status = status_response_text["status"] |
| 164 | + status_message = f"Status: {status_response_text['status']}" |
| 165 | + st.success(status_message) if job_status in [ |
| 166 | + "running", |
| 167 | + "complete", |
| 168 | + ] else st.warning(status_message) |
| 169 | + except Exception as e: |
| 170 | + print(e) |
| 171 | + try: |
| 172 | + # build percent complete message |
| 173 | + percent_complete = status_response_text["percent_complete"] |
| 174 | + progress_bar.progress(float(percent_complete) / 100) |
| 175 | + completion_message = ( |
| 176 | + f"Percent Complete: {percent_complete}% " |
| 177 | + ) |
| 178 | + st.warning( |
| 179 | + completion_message |
| 180 | + ) if percent_complete < 100 else st.success( |
| 181 | + completion_message |
| 182 | + ) |
| 183 | + except Exception as e: |
| 184 | + print(e) |
| 185 | + try: |
| 186 | + # build progress message |
| 187 | + progress_status = status_response_text["progress"] |
| 188 | + progress_status = ( |
| 189 | + progress_status if progress_status else "N/A" |
| 190 | + ) |
| 191 | + progress_message = f"Progress: {progress_status}" |
| 192 | + st.success( |
| 193 | + progress_message |
| 194 | + ) if progress_status != "N/A" else st.warning( |
| 195 | + progress_message |
| 196 | + ) |
| 197 | + except Exception as e: |
| 198 | + print(e) |
| 199 | + else: |
| 200 | + st.warning( |
| 201 | + f"No status information available for this index: {index_name_select}" |
| 202 | + ) |
| 203 | + else: |
| 204 | + st.warning( |
| 205 | + f"No workflow information available for this index: {index_name_select}" |
| 206 | + ) |
0 commit comments