Skip to content

Commit 0c5c666

Browse files
committed
add 10_journal for the queryable daily journal system
1 parent 80feacc commit 0c5c666

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

10_journal.py

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import argparse
2+
import logging
3+
import sys
4+
from pathlib import Path
5+
from dotenv import load_dotenv
6+
from llama_index import (
7+
ObsidianReader,
8+
GPTVectorStoreIndex,
9+
StorageContext,
10+
load_index_from_storage
11+
)
12+
13+
# to see token counter and token usage for the LLM and Embedding
14+
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
15+
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
16+
17+
load_dotenv()
18+
19+
OBSIDIAN_DIR = "/home/samuel/vaults/fragments/journals"
20+
docs = ObsidianReader(OBSIDIAN_DIR).load_data()
21+
22+
23+
def read_journal_md(file_path):
24+
from bs4 import BeautifulSoup
25+
import markdown
26+
import re
27+
28+
with open(file_path, "r") as f:
29+
text = f.read()
30+
html = markdown.markdown(text)
31+
soup = BeautifulSoup(html, "html.parser")
32+
33+
# take only the first <p> tag
34+
# p = soup.find("p")
35+
ps = soup.find_all("p")
36+
# take only the p tags that have at least 2 `|` characters
37+
p = [p for p in ps if p.text.count("|") > 1]
38+
39+
# replace all characters before the first `|` character with ''
40+
result = re.sub(r'^[^|]*\|', '', p[0].text)
41+
42+
print(f"Finished processing {file_path}")
43+
return result
44+
45+
46+
def create_journal_nodes(dir_path):
47+
"""
48+
Examples: https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html
49+
"""
50+
from llama_index import Document
51+
from llama_index.node_parser import SimpleNodeParser
52+
53+
54+
docs = []
55+
parser = SimpleNodeParser()
56+
57+
# loop through each markdown file in the directory
58+
for file_path in Path(dir_path).glob("*.md"):
59+
md = read_journal_md(file_path)
60+
# construct documents manually using the lower level Document struct
61+
docs.append(Document(md))
62+
63+
nodes = parser.get_nodes_from_documents(docs)
64+
65+
66+
return nodes, docs
67+
68+
if Path("./storage").exists():
69+
storage_context = StorageContext.from_defaults(persist_dir="./storage")
70+
index = load_index_from_storage(storage_context)
71+
else:
72+
nodes, docs = create_journal_nodes(OBSIDIAN_DIR)
73+
index = GPTVectorStoreIndex(nodes)
74+
index.storage_context.persist(persist_dir="./storage")
75+
76+
if __name__ == "__main__":
77+
"""
78+
Usage: python 10_journal_x.py -q "what are places I ate at in March and April?"
79+
"""
80+
query_engine = index.as_query_engine()
81+
# cli argument parser
82+
parser = argparse.ArgumentParser(
83+
prog="QueryJournal",
84+
description="Query my bullet journals in Obsidian using Llama Index."
85+
)
86+
parser.add_argument(
87+
"-q",
88+
"--query",
89+
type=str,
90+
help="Ask a question answerable in my journals",
91+
required=True
92+
)
93+
args = parser.parse_args()
94+
query = args.query
95+
96+
if(query):
97+
res = query_engine.query(query)
98+
print(f"Query: {query}")
99+
print(f"Results: \n {res}")
100+
else:
101+
print("No query provided. Exiting...")
102+
exit(0)

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Learn LangChain from my YouTube channel:
1515
| 5 | Query any website with LLamaIndex + GPT3 (ft. Chromadb, Trafilatura) | [Tutorial Video](https://youtu.be/6K1lyyzpxtk) | 11:11 |
1616
| 6 | Locally-hosted, offline LLM w/LlamaIndex + OPT (open source, instruction-tuning LLM) | [Tutorial Video](https://youtu.be/qAvHs6UNb2k) | 32:27 |
1717
| 7 | Building an AI Language Tutor: Pinecone + LlamaIndex + GPT-3 + BeautifulSoup | [Tutorial Video](https://youtu.be/k8G1EDZgF1E) | 51:08 |
18+
| 8 | Building a queryable journal 💬 w/ OpenAI, markdown & LlamaIndex 🦙 | [Tutorial Video](https://youtu.be/OzDhJOR5IfQ) | 40:29 |
1819

1920
The full lesson playlist can be found [here](https://www.youtube.com/playlist?list=PLXsFtK46HZxUQERRbOmuGoqbMD-KWLkOS).
2021

0 commit comments

Comments
 (0)