From 17ca0e7f577841cc7c4032f2337855988ebea067 Mon Sep 17 00:00:00 2001
From: Sachin Duhan <38427564+sachin-duhan@users.noreply.github.com>
Date: Mon, 9 Oct 2023 00:20:32 +0530
Subject: [PATCH 1/5] code quality fix

---
 .github/media/workflows/quality.yaml | 33 +++++++++++++
 .pre-commit-config.yaml              | 53 +++++++++++++++++++++
 bot.py                               | 69 ++++++++++++++++------------
 chains.py                            | 11 +++--
 loader.py                            |  3 +-
 pdf_bot.py                           | 12 ++---
 utils.py                             |  9 ++--
 7 files changed, 140 insertions(+), 50 deletions(-)
 create mode 100644 .github/media/workflows/quality.yaml
 create mode 100644 .pre-commit-config.yaml

diff --git a/.github/media/workflows/quality.yaml b/.github/media/workflows/quality.yaml
new file mode 100644
index 00000000..3cff5a5b
--- /dev/null
+++ b/.github/media/workflows/quality.yaml
@@ -0,0 +1,33 @@
+name: 
+
+on: push
+
+jobs:
+  black:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install deps
+      uses: knowsuchagency/poetry-install@v1
+      env:
+        POETRY_VIRTUALENVS_CREATE: false
+    - name: Run black check
+      run: python3 -m black --check .
+  flake8:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install deps
+      uses: knowsuchagency/poetry-install@v1
+      env:
+        POETRY_VIRTUALENVS_CREATE: false
+    - name: Run flake8 check
+      run: python3 -m flake8 --count .
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..0c530150
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,53 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v2.4.0
+  hooks:
+  - id: check-ast
+  - id: trailing-whitespace
+  - id: check-toml
+  - id: end-of-file-fixer
+
+- repo: https://github.com/asottile/add-trailing-comma
+  rev: v2.1.0
+  hooks:
+  - id: add-trailing-comma
+
+- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
+  rev: v2.1.0
+  hooks:
+  - id: pretty-format-yaml
+    args:
+    - --autofix
+    - --preserve-quotes
+    - --indent=2
+
+- repo: local
+  hooks:
+  - id: autoflake
+    name: autoflake
+    entry: poetry run autoflake
+    language: system
+    types: [python]
+    args: [--in-place, --remove-all-unused-imports, --remove-duplicate-keys]
+
+  - id: black
+    name: Format with Black
+    entry: poetry run black
+    language: system
+    types: [python]
+
+  - id: isort
+    name: isort
+    entry: poetry run isort
+    language: system
+    types: [python]
+
+  - id: flake8
+    name: Check with Flake8
+    entry: poetry run flake8
+    language: system
+    pass_filenames: false
+    types: [python]
+    args: [--count, .]
diff --git a/bot.py b/bot.py
index 2290602a..397b2785 100644
--- a/bot.py
+++ b/bot.py
@@ -83,44 +83,52 @@ def on_llm_new_token(self, token: str, **kwargs) -> None:
 
 
 def chat_input():
-    user_input = st.chat_input("What coding issue can I help you resolve today?")
-
-    if user_input:
-        with st.chat_message("user"):
-            st.write(user_input)
-        with st.chat_message("assistant"):
-            st.caption(f"RAG: {name}")
-            stream_handler = StreamHandler(st.empty())
-            result = output_function(
-                {"question": user_input, "chat_history": []}, callbacks=[stream_handler]
-            )["answer"]
-            output = result
-            st.session_state[f"user_input"].append(user_input)
-            st.session_state[f"generated"].append(output)
-            st.session_state[f"rag_mode"].append(name)
+    if not (
+        user_input := st.chat_input(
+            "What coding issue can I help you resolve today?"
+        )
+    ):
+        return
+    with st.chat_message("user"):
+        st.write(user_input)
+    with st.chat_message("assistant"):
+        _extracted_from_chat_input_(user_input)
+
+
+# TODO Rename this here and in `chat_input`
+def _extracted_from_chat_input_(user_input):
+    st.caption(f"RAG: {name}")
+    stream_handler = StreamHandler(st.empty())
+    result = output_function(
+        {"question": user_input, "chat_history": []}, callbacks=[stream_handler]
+    )["answer"]
+    output = result
+    st.session_state["user_input"].append(user_input)
+    st.session_state["generated"].append(output)
+    st.session_state["rag_mode"].append(name)
 
 
 def display_chat():
     # Session state
     if "generated" not in st.session_state:
-        st.session_state[f"generated"] = []
+        st.session_state["generated"] = []
 
     if "user_input" not in st.session_state:
-        st.session_state[f"user_input"] = []
+        st.session_state["user_input"] = []
 
     if "rag_mode" not in st.session_state:
-        st.session_state[f"rag_mode"] = []
+        st.session_state["rag_mode"] = []
 
-    if st.session_state[f"generated"]:
-        size = len(st.session_state[f"generated"])
+    if st.session_state["generated"]:
+        size = len(st.session_state["generated"])
         # Display only the last three exchanges
         for i in range(max(size - 3, 0), size):
             with st.chat_message("user"):
-                st.write(st.session_state[f"user_input"][i])
+                st.write(st.session_state["user_input"][i])
 
             with st.chat_message("assistant"):
-                st.caption(f"RAG: {st.session_state[f'rag_mode'][i]}")
-                st.write(st.session_state[f"generated"][i])
+                st.caption(f"RAG: {st.session_state['rag_mode'][i]}")
+                st.write(st.session_state["generated"][i])
 
         with st.expander("Not finding what you're looking for?"):
             st.write(
@@ -142,9 +150,9 @@ def mode_select() -> str:
 
 
 name = mode_select()
-if name == "LLM only" or name == "Disabled":
+if name in ["LLM only", "Disabled"]:
     output_function = llm_chain
-elif name == "Vector + Graph" or name == "Enabled":
+elif name in ["Vector + Graph", "Enabled"]:
     output_function = rag_chain
 
 
@@ -153,9 +161,10 @@ def generate_ticket():
     records = neo4j_graph.query(
         "MATCH (q:Question) RETURN q.title AS title, q.body AS body ORDER BY q.score DESC LIMIT 3"
     )
-    questions = []
-    for i, question in enumerate(records, start=1):
-        questions.append((question["title"], question["body"]))
+    questions = [
+        (question["title"], question["body"])
+        for i, question in enumerate(records, start=1)
+    ]
     # Ask LLM to generate new question in the same style
     questions_prompt = ""
     for i, question in enumerate(questions, start=1):
@@ -182,7 +191,7 @@ def generate_ticket():
     system_prompt = SystemMessagePromptTemplate.from_template(
         gen_system_template, template_format="jinja2"
     )
-    q_prompt = st.session_state[f"user_input"][-1]
+    q_prompt = st.session_state["user_input"][-1]
     chat_prompt = ChatPromptTemplate.from_messages(
         [
             system_prompt,
@@ -215,7 +224,7 @@ def close_sidebar():
     st.session_state.open_sidebar = False
 
 
-if not "open_sidebar" in st.session_state:
+if "open_sidebar" not in st.session_state:
     st.session_state.open_sidebar = False
 if st.session_state.open_sidebar:
     new_title, new_question = generate_ticket()
diff --git a/chains.py b/chains.py
index bcfa70c1..f83b0b79 100644
--- a/chains.py
+++ b/chains.py
@@ -13,7 +13,9 @@
 from utils import BaseLogger
 
 
-def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config={}):
+def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config=None):
+    if config is None:
+        config = {}
     if embedding_model_name == "ollama":
         embeddings = OllamaEmbeddings(
             base_url=config["ollama_base_url"], model="llama2"
@@ -33,7 +35,9 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config=
     return embeddings, dimension
 
 
-def load_llm(llm_name: str, logger=BaseLogger(), config={}):
+def load_llm(llm_name: str, logger=BaseLogger(), config=None):
+    if config is None:
+        config = {}
     if llm_name == "gpt-4":
         logger.info("LLM: Using GPT-4")
         return ChatOpenAI(temperature=0, model_name="gpt-4", streaming=True)
@@ -140,10 +144,9 @@ def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, pass
     """,
     )
 
-    kg_qa = RetrievalQAWithSourcesChain(
+    return RetrievalQAWithSourcesChain(
         combine_documents_chain=qa_chain,
         retriever=kg.as_retriever(search_kwargs={"k": 2}),
         reduce_k_below_max_tokens=False,
         max_tokens_limit=3375,
     )
-    return kg_qa
diff --git a/loader.py b/loader.py
index 8cc08023..c047448f 100644
--- a/loader.py
+++ b/loader.py
@@ -97,10 +97,9 @@ def insert_so_data(data: dict) -> None:
 
 # Streamlit
 def get_tag() -> str:
-    input_text = st.text_input(
+    return st.text_input(
         "Which tag questions do you want to import?", value="neo4j"
     )
-    return input_text
 
 
 def get_pages():
diff --git a/pdf_bot.py b/pdf_bot.py
index fde7772b..979a96ef 100644
--- a/pdf_bot.py
+++ b/pdf_bot.py
@@ -57,10 +57,7 @@ def main():
     if pdf is not None:
         pdf_reader = PdfReader(pdf)
 
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text()
-
+        text = "".join(page.extract_text() for page in pdf_reader.pages)
         # langchain_textspliter
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000, chunk_overlap=200, length_function=len
@@ -83,10 +80,9 @@ def main():
             llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever()
         )
 
-        # Accept user questions/query
-        query = st.text_input("Ask questions about related your upload pdf file")
-
-        if query:
+        if query := st.text_input(
+            "Ask questions about related your upload pdf file"
+        ):
             stream_handler = StreamHandler(st.empty())
             qa.run(query, callbacks=[stream_handler])
 
diff --git a/utils.py b/utils.py
index 9404f154..ff521e8d 100644
--- a/utils.py
+++ b/utils.py
@@ -1,3 +1,4 @@
+import contextlib
 class BaseLogger:
     def __init__(self) -> None:
         self.info = print
@@ -28,15 +29,11 @@ def extract_title_and_question(input_string):
 
 def create_vector_index(driver, dimension: int) -> None:
     index_query = "CALL db.index.vector.createNodeIndex('stackoverflow', 'Question', 'embedding', $dimension, 'cosine')"
-    try:
+    with contextlib.suppress(Exception):
         driver.query(index_query, {"dimension": dimension})
-    except:  # Already exists
-        pass
     index_query = "CALL db.index.vector.createNodeIndex('top_answers', 'Answer', 'embedding', $dimension, 'cosine')"
-    try:
+    with contextlib.suppress(Exception):
         driver.query(index_query, {"dimension": dimension})
-    except:  # Already exists
-        pass
 
 
 def create_constraints(driver):

From a00970fd63898efe4894e75b00f356b4c8f8affa Mon Sep 17 00:00:00 2001
From: Sachin Duhan <38427564+sachin-duhan@users.noreply.github.com>
Date: Mon, 9 Oct 2023 00:21:11 +0530
Subject: [PATCH 2/5] move: workflow folder to .git

---
 .github/{media => }/workflows/quality.yaml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/{media => }/workflows/quality.yaml (100%)

diff --git a/.github/media/workflows/quality.yaml b/.github/workflows/quality.yaml
similarity index 100%
rename from .github/media/workflows/quality.yaml
rename to .github/workflows/quality.yaml

From fdaff10eaf28ad8449514674559ba300a5fc3a0d Mon Sep 17 00:00:00 2001
From: Sachin Duhan <38427564+sachin-duhan@users.noreply.github.com>
Date: Mon, 9 Oct 2023 00:30:29 +0530
Subject: [PATCH 3/5] code quality patch

---
 .github/workflows/quality.yaml |  14 ++--
 .gitignore                     | 146 +++++++++++++++++++++++++++++++++
 requirements.txt               |   4 +-
 3 files changed, 154 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
index 3cff5a5b..15903f7a 100644
--- a/.github/workflows/quality.yaml
+++ b/.github/workflows/quality.yaml
@@ -1,4 +1,4 @@
-name: 
+name:
 
 on: push
 
@@ -11,10 +11,8 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: '3.9'
-    - name: Install deps
-      uses: knowsuchagency/poetry-install@v1
-      env:
-        POETRY_VIRTUALENVS_CREATE: false
+    - name: Install dependencies
+      run: pip install -r requirements.txt
     - name: Run black check
       run: python3 -m black --check .
   flake8:
@@ -25,9 +23,7 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: '3.9'
-    - name: Install deps
-      uses: knowsuchagency/poetry-install@v1
-      env:
-        POETRY_VIRTUALENVS_CREATE: false
+    - name: Install dependencies
+      run: pip install -r requirements.txt
     - name: Run flake8 check
       run: python3 -m flake8 --count .
diff --git a/.gitignore b/.gitignore
index 93ecc0ff..0fcdd970 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,149 @@ data/
 embedding_model/*
 !embedding_model/.ignore
 .DS_Store
+
+### Python template
+
+.idea/
+.vscode/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+*.sqlite3
+*.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# secret config files
+*.secret.*
diff --git a/requirements.txt b/requirements.txt
index 397aafe4..3fface59 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,6 @@ streamlit
 sentence_transformers==2.2.2
 Pillow
 PyPDF2
-torch==2.0.1
\ No newline at end of file
+torch==2.0.1
+black
+flake8

From b2ab89fd53bc47a959cc89c80e217f7262eaed31 Mon Sep 17 00:00:00 2001
From: Sachin Duhan <38427564+sachin-duhan@users.noreply.github.com>
Date: Mon, 9 Oct 2023 00:33:28 +0530
Subject: [PATCH 4/5] workflow installation perf boost

---
 .github/workflows/quality.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
index 15903f7a..f99e3290 100644
--- a/.github/workflows/quality.yaml
+++ b/.github/workflows/quality.yaml
@@ -12,7 +12,7 @@ jobs:
       with:
         python-version: '3.9'
     - name: Install dependencies
-      run: pip install -r requirements.txt
+      run: pip install -r black
     - name: Run black check
       run: python3 -m black --check .
   flake8:
@@ -24,6 +24,6 @@ jobs:
       with:
         python-version: '3.9'
     - name: Install dependencies
-      run: pip install -r requirements.txt
+      run: pip install flake8
     - name: Run flake8 check
       run: python3 -m flake8 --count .

From 59b119ba884b411e91303943775bcfa5b16ea2c5 Mon Sep 17 00:00:00 2001
From: Sachin Duhan <38427564+sachin-duhan@users.noreply.github.com>
Date: Mon, 9 Oct 2023 00:35:01 +0530
Subject: [PATCH 5/5] code quality updates;

---
 .github/workflows/quality.yaml | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 .github/workflows/quality.yaml

diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
deleted file mode 100644
index f99e3290..00000000
--- a/.github/workflows/quality.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-name:
-
-on: push
-
-jobs:
-  black:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.9'
-    - name: Install dependencies
-      run: pip install -r black
-    - name: Run black check
-      run: python3 -m black --check .
-  flake8:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.9'
-    - name: Install dependencies
-      run: pip install flake8
-    - name: Run flake8 check
-      run: python3 -m flake8 --count .