Merge pull request #24 from BillFarber/task/jamesUpdates

rjrudin · web-flow · commit 9047d61c7223 · 2024-10-16T13:13:00.000-04:00
Get the transcript from the document instead of the view.
diff --git a/rag-langchain-python/vector_query_retriever.py b/rag-langchain-python/vector_query_retriever.py
@@ -34,6 +34,7 @@ def _build_eval_script(self, query, query_embedding):
     null, {{'scoreMethod': 'score-bm25'}}
   )
   .limit(100)
+  .bind(op.as('transcript', op.xpath('doc', '/transcript')))
   .joinInner(
     op.fromView('example','events', '', op.fragmentIdCol('vectorsDocId')),
     op.on(
@@ -45,6 +46,7 @@ def _build_eval_script(self, query, query_embedding):
     op.vec.vector(op.col('embedding')),
     op.vec.vector(vec.vector({}))
   )))
+  .select(['uri', 'transcript', 'similarity'])
   .orderBy(op.desc(op.col('similarity')))
   .limit(10)
   .result()
@@ -56,9 +58,10 @@ def _build_eval_script(self, query, query_embedding):
     def _get_relevant_documents(self, query: str) -> List[Document]:
         query_embedding = self.embedding_generator.embed_query(query)
         eval_script = self._build_eval_script(query, query_embedding)
-        results = self.client.eval(javascript=eval_script)
+        optic_rows = self.client.eval(javascript=eval_script)
+        print(optic_rows[1].keys())
 
-        print(f"Count of MarkLogic documents sent to the LLM: {len(results)}")
-        for result in results:
-            print(f"URI: {result['uri']}")
-        return map(lambda doc: Document(page_content=doc["text"]), results)
+        print(f"Count of MarkLogic chunks sent to the LLM: {len(optic_rows)}")
+        for optic_row in optic_rows:
+            print(f"URI: {optic_row['uri']}")
+        return map(lambda optic_row: Document(page_content=optic_row["transcript"]), optic_rows)
diff --git a/setup/src/main/ml-schemas-12/tde/events.json b/setup/src/main/ml-schemas-12/tde/events.json
@@ -20,11 +20,6 @@
             "val": "vec:vector(embedding)",
             "dimension": "1536",
             "invalidValues": "reject"
-          },
-          {
-            "name": "text",
-            "scalarType": "string",
-            "val": "transcript"
           }
         ]
       }

Original file line number	Diff line number	Diff line change
`@@ -20,11 +20,6 @@`
`20`	`20`	`"val": "vec:vector(embedding)",`
`21`	`21`	`"dimension": "1536",`
`22`	`22`	`"invalidValues": "reject"`
`23`		`- },`
`24`		`- {`
`25`		`- "name": "text",`
`26`		`- "scalarType": "string",`
`27`		`- "val": "transcript"`
`28`	`23`	`}`
`29`	`24`	`]`
`30`	`25`	`}`