Skip to content

Commit d10539a

Browse files
committed
CAMEL-21719: Neo4j Embedding Data transfomer for RAG results
1 parent 96df20b commit d10539a

File tree

12 files changed

+285
-37
lines changed

12 files changed

+285
-37
lines changed

catalog/camel-catalog/src/generated/resources/org/apache/camel/catalog/transformers.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ google-storage-application-cloudevents
2929
http-application-cloudevents
3030
milvus-embeddings
3131
neo4j-embeddings
32+
neo4j-rag
3233
pinecone-embeddings
3334
protobuf-binary
3435
protobuf-x-java-object
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"transformer": {
3+
"kind": "transformer",
4+
"name": "neo4j:rag",
5+
"title": "Neo4j (Rag)",
6+
"description": "Prepares the similarity search LangChain4j embeddings to become a List of String for LangChain4j RAG",
7+
"deprecated": false,
8+
"javaType": "org.apache.camel.component.neo4j.transformer.Neo4jReverseEmbeddingsDataTypeTransformer",
9+
"groupId": "org.apache.camel",
10+
"artifactId": "camel-neo4j",
11+
"version": "4.10.0-SNAPSHOT"
12+
}
13+
}
14+

components/camel-ai/camel-langchain4j-embeddings/src/test/java/org/apache/camel/component/langchain4j/embeddings/LangChain4jEmbeddingsComponentNeo4jTargetIT.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.camel.component.langchain4j.embeddings;
1818

19+
import java.util.Collection;
1920
import java.util.List;
2021
import java.util.Map;
2122

@@ -28,6 +29,7 @@
2829
import org.apache.camel.component.neo4j.Neo4Operation;
2930
import org.apache.camel.component.neo4j.Neo4jComponent;
3031
import org.apache.camel.component.neo4j.Neo4jConstants;
32+
import org.apache.camel.spi.DataType;
3133
import org.apache.camel.test.infra.neo4j.services.Neo4jService;
3234
import org.apache.camel.test.infra.neo4j.services.Neo4jServiceFactory;
3335
import org.apache.camel.test.junit5.CamelTestSupport;
@@ -47,7 +49,7 @@
4749
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
4850
public class LangChain4jEmbeddingsComponentNeo4jTargetIT extends CamelTestSupport {
4951

50-
public static final String NEO4J_URI = "neo4j:neo4j";
52+
public static final String NEO4J_URI = "neo4j:neo4j?vectorIndexName=myIndex&label=Test";
5153
@RegisterExtension
5254
static Neo4jService NEO4J = Neo4jServiceFactory.createSingletonService();
5355

@@ -138,6 +140,20 @@ void testSearchEmbedding() {
138140

139141
}
140142

143+
@Test
144+
@Order(3)
145+
public void rag_similarity_search() {
146+
Exchange result = fluentTemplate.to("direct:search")
147+
.withBody("hi")
148+
.request(Exchange.class);
149+
150+
assertThat(result).isNotNull();
151+
assertThat(result.getException()).isNull();
152+
153+
assertThat(result.getIn().getBody()).isInstanceOfSatisfying(Collection.class, c -> assertThat(c).hasSize(1));
154+
assertTrue(result.getIn().getBody(List.class).contains("hi"));
155+
}
156+
141157
@Override
142158
protected RoutesBuilder createRouteBuilder() {
143159
return new RouteBuilder() {
@@ -149,6 +165,17 @@ public void configure() {
149165
.setHeader(Neo4jConstants.Headers.LABEL).constant("Test")
150166
.transform(new org.apache.camel.spi.DataType("neo4j:embeddings"))
151167
.to(NEO4J_URI);
168+
169+
from("direct:search")
170+
.to("langchain4j-embeddings:test")
171+
// transform prompt into embeddings for search
172+
.transform(
173+
new DataType("neo4j:embeddings"))
174+
.setHeader(Neo4jConstants.Headers.OPERATION, constant(Neo4Operation.VECTOR_SIMILARITY_SEARCH))
175+
.to(NEO4J_URI)
176+
// decode retrieved embeddings for RAG
177+
.transform(
178+
new DataType("neo4j:rag"));
152179
}
153180
};
154181
}

components/camel-ai/camel-neo4j/src/generated/resources/META-INF/services/org/apache/camel/transformer.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Generated by camel build tools - do NOT edit this file!
2-
transformers=neo4j:embeddings
2+
transformers=neo4j:embeddings neo4j:rag
33
groupId=org.apache.camel
44
artifactId=camel-neo4j
55
version=4.10.0-SNAPSHOT
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Generated by camel build tools - do NOT edit this file!
2+
class=org.apache.camel.component.neo4j.transformer.Neo4jReverseEmbeddingsDataTypeTransformer
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"transformer": {
3+
"kind": "transformer",
4+
"name": "neo4j:rag",
5+
"title": "Neo4j (Rag)",
6+
"description": "Prepares the similarity search LangChain4j embeddings to become a List of String for LangChain4j RAG",
7+
"deprecated": false,
8+
"javaType": "org.apache.camel.component.neo4j.transformer.Neo4jReverseEmbeddingsDataTypeTransformer",
9+
"groupId": "org.apache.camel",
10+
"artifactId": "camel-neo4j",
11+
"version": "4.10.0-SNAPSHOT"
12+
}
13+
}
14+

components/camel-ai/camel-neo4j/src/main/docs/neo4j-component.adoc

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,20 @@ The URI endpoint should contain also specify the vector index name.
223223
=== Create a vector
224224
To create a vector in a database named `test`, use the operation `CREATE_VECTOR`.
225225
The URI endpoint should also specify the label, the alias and the vector index name.
226+
Put the vector array in the `CamelLangChain4jEmbeddingsVector` header, and the corresponding text in the body.
227+
The `id` can be generated by Camel Neo4j.
228+
229+
Camel Neo4j will create the node and store the vector as an `embedding` property, the text as `text` property and the `id`as `id` property.
226230

227-
Camel Neo4j will create the node and store the vector as an `embedding` property.
228231

229232
.Example:
230233
[source,java]
231234
----
232-
Exchange result = fluentTemplate.to("neo4j:test?vectorIndexName=movieIdx&label=Movie&alias=m")
233-
.withHeader(Neo4j.Headers.OPERATION, Neo4Operation.CREATE_VECTOR)
234-
.withHeader(Neo4j.Headers.VECTOR_ID, testData.getId())
235-
.withBody(List.of(0.8f, 0.6f))
235+
Exchange result = fluentTemplate.to("neo4j:test?vectorIndexName=myIndex&label=Test&alias=t")
236+
.withHeader(Neo4jConstants.Headers.OPERATION, Neo4Operation.CREATE_VECTOR)
237+
.withHeader(Neo4jConstants.Headers.VECTOR_ID, "1")
238+
.withHeader("CamelLangChain4jEmbeddingsVector", new float[] { 10.8f, 10.6f })
239+
.withBody("Hello World!")
236240
.request(Exchange.class);
237241
----
238242

@@ -244,23 +248,48 @@ The URI endpoint should also specify the label, the alias and the vector index n
244248
.Example:
245249
[source,java]
246250
----
247-
Exchange result = fluentTemplate.to("neo4j:test?vectorIndexName=movieIdx&label=Movie&alias=m")
248-
.withHeader(Neo4j.Headers.OPERATION, Neo4Operation.CREATE_VECTOR)
249-
.withHeader(Neo4j.Headers.VECTOR_ID, testData.getId())
251+
Exchange result = fluentTemplate.to("neo4j:test?vectorIndexName=myIndex&label=Test&alias=t")
252+
.withHeader(Neo4jConstants.Headers.OPERATION, Neo4Operation.VECTOR_SIMILARITY_SEARCH)
250253
.withBody(List.of(0.75f, 0.65f))
251254
.request(Exchange.class);
252255
----
253256

254257
== Generate Embeddings with Langchain4j-embeddings
255258
You can generate embeddings with an Embedding Models using the camel Lancghain4j Embeddings components. Camel Neo4j introduces a DataType `neo4j:embeddings` that automates the transformations of the Lancghain4j embeddings to Neo4j vectors.
256259

257-
.Example of a camel Route that create embeddings with Camel Langchain4j Embeddings
260+
.Example of a camel Route that create embeddings with Camel Langchain4j Embeddings, and ingest them into Neo4j database.
258261
[source,java]
259262
----
260263
from("direct:in")
261264
.to("langchain4j-embeddings:test")
262265
.setHeader(Neo4j.Headers.OPERATION).constant(Neo4Operation.CREATE_VECTOR)
263266
.setHeader(Neo4j.Headers.LABEL).constant("Test")
264267
.transform(new DataType("neo4j:embeddings"))
265-
.to("neo4j:test");
268+
.to("neo4j:neo4j?vectorIndexName=myIndex&label=Test");
266269
----
270+
271+
== Similarity Search for LangChain4j RAG
272+
You can enhance the Camel LangChain4j chat RAG experience by integrating Neo4j similarity search with Camel Neo4j DataTypes.
273+
274+
To achieve this, use the `neo4j:embeddings` DataType to generate embeddings from the prompt. These embeddings will then be utilized for the similarity search operation.
275+
276+
Next, use the `neo4j:rag` DataType to convert the retrieved embeddings into a List<String> for RAG. This list can be directly used with the `LangChain4jRagAggregatorStrategy` from the LangChain4j chat component.
277+
278+
NOTE: The retrieved embeddings must be ingested in Neo4j as LangChain4j embeddings.
279+
280+
.Example of a camel Route that performs a similarity search in the Vector index, using a string and returning a list of strings
281+
[source,java]
282+
----
283+
from("direct:search")
284+
.to("langchain4j-embeddings:test")
285+
// transform prompt into embeddings for search
286+
.transform(
287+
new DataType("neo4j:embeddings"))
288+
.setHeader(Neo4jConstants.Headers.OPERATION, constant(Neo4Operation.VECTOR_SIMILARITY_SEARCH))
289+
.to("neo4j:neo4j?vectorIndexName=myIndex&label=Test")
290+
// decode retrieved embeddings for RAG
291+
.transform(
292+
new DataType("neo4j:rag"));
293+
----
294+
295+
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.camel.component.neo4j;
18+
19+
/**
20+
* Class that represents the embedding to persist when using LangChain4j - The names of the properties correspond to the
21+
* ones in LangChain4j project for compatibility.
22+
*/
23+
public class Neo4jEmbedding {
24+
private String id;
25+
26+
private String text;
27+
28+
private float[] vectors;
29+
30+
public Neo4jEmbedding(String id, String text, float[] vectors) {
31+
this.id = id;
32+
this.text = text;
33+
this.vectors = vectors;
34+
}
35+
36+
public String getId() {
37+
return id;
38+
}
39+
40+
public String getText() {
41+
return text;
42+
}
43+
44+
public float[] getVectors() {
45+
return vectors;
46+
}
47+
}

components/camel-ai/camel-neo4j/src/main/java/org/apache/camel/component/neo4j/Neo4jProducer.java

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.camel.InvalidPayloadException;
2626
import org.apache.camel.Message;
2727
import org.apache.camel.NoSuchHeaderException;
28+
import org.apache.camel.ai.CamelLangchain4jAttributes;
2829
import org.apache.camel.support.DefaultProducer;
2930
import org.apache.camel.util.ObjectHelper;
3031
import org.neo4j.driver.Driver;
@@ -242,37 +243,60 @@ private void dropVectorIndex(Exchange exchange) {
242243

243244
private void createVector(Exchange exchange) {
244245
final String alias
245-
= getEndpoint().getConfiguration().getAlias() != null ? getEndpoint().getConfiguration().getAlias() : "x";
246+
= getEndpoint().getConfiguration().getAlias() != null ? getEndpoint().getConfiguration().getAlias() : "e";
246247

247-
final String label = exchange.getMessage().getHeader(Neo4jConstants.Headers.LABEL,
248-
() -> getEndpoint().getConfiguration().getLabel(), String.class);
249-
ObjectHelper.notNull(label, "label");
250-
251-
final String id
252-
= exchange.getMessage().getHeader(Neo4jConstants.Headers.VECTOR_ID, () -> UUID.randomUUID(), String.class);
248+
final String label
249+
= getEndpoint().getConfiguration().getLabel() != null
250+
? getEndpoint().getConfiguration().getLabel() : "Embedding";
253251

254-
final float[] body = exchange.getMessage().getBody(float[].class);
252+
String id;
253+
String text;
254+
float[] vectors;
255255

256256
final String databaseName = getEndpoint().getName();
257257

258+
Object body = exchange.getMessage().getBody();
259+
260+
if (body instanceof Neo4jEmbedding) {
261+
id = ((Neo4jEmbedding) body).getId();
262+
text = ((Neo4jEmbedding) body).getText();
263+
vectors = ((Neo4jEmbedding) body).getVectors();
264+
} else {
265+
id = exchange.getMessage().getHeader(Neo4jConstants.Headers.VECTOR_ID, () -> UUID.randomUUID(), String.class);
266+
vectors = exchange.getMessage().getHeader(CamelLangchain4jAttributes.CAMEL_LANGCHAIN4J_EMBEDDING_VECTOR,
267+
float[].class);
268+
text = exchange.getMessage().getBody(String.class);
269+
}
270+
271+
ObjectHelper.notNull(text, "text");
272+
ObjectHelper.notNull(vectors, "vectors");
273+
258274
String query = String.format("""
259-
MERGE (%s:%s {id: $id})
275+
MERGE (%s:%s {id: $id, text: $text})
260276
WITH %s
261277
CALL db.create.setNodeVectorProperty(%s, 'embedding', $embedding);
262278
""", alias, label, alias, alias);
263279

264280
Map<String, Object> params = Map.of(
265-
"embedding", Values.value(body),
266-
"id", id);
281+
"embedding", Values.value(vectors),
282+
"id", id,
283+
"text", text);
267284

268285
executeWriteQuery(exchange, query, params, databaseName, Neo4Operation.CREATE_VECTOR);
269286
}
270287

271-
public void similaritySearch(Exchange exchange) {
288+
public void similaritySearch(Exchange exchange) throws InvalidPayloadException {
272289
final String vectorIndexName = getEndpoint().getConfiguration().getVectorIndexName();
273290
ObjectHelper.notNull(vectorIndexName, "vectorIndexName");
274291

275-
final float[] body = exchange.getMessage().getBody(float[].class);
292+
float[] vectors;
293+
294+
Object body = exchange.getMessage().getMandatoryBody();
295+
if (body instanceof Neo4jEmbedding) {
296+
vectors = ((Neo4jEmbedding) body).getVectors();
297+
} else {
298+
vectors = exchange.getMessage().getBody(float[].class);
299+
}
276300

277301
final double minScore = getEndpoint().getConfiguration().getMinScore();
278302

@@ -288,7 +312,7 @@ public void similaritySearch(Exchange exchange) {
288312
""";
289313

290314
Map<String, Object> params = Map.of("indexName", vectorIndexName,
291-
"embeddingValue", body,
315+
"embeddingValue", vectors,
292316
"minScore", minScore,
293317
"maxResults", maxResults);
294318

components/camel-ai/camel-neo4j/src/main/java/org/apache/camel/component/neo4j/transformer/Neo4jEmbeddingDataTypeTransformer.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,14 @@
1616
*/
1717
package org.apache.camel.component.neo4j.transformer;
1818

19+
import java.util.UUID;
20+
1921
import dev.langchain4j.data.embedding.Embedding;
22+
import dev.langchain4j.data.segment.TextSegment;
2023
import org.apache.camel.Message;
2124
import org.apache.camel.ai.CamelLangchain4jAttributes;
25+
import org.apache.camel.component.neo4j.Neo4jConstants;
26+
import org.apache.camel.component.neo4j.Neo4jEmbedding;
2227
import org.apache.camel.spi.DataType;
2328
import org.apache.camel.spi.DataTypeTransformer;
2429
import org.apache.camel.spi.Transformer;
@@ -28,7 +33,15 @@
2833
public class Neo4jEmbeddingDataTypeTransformer extends Transformer {
2934
@Override
3035
public void transform(Message message, DataType fromType, DataType toType) {
31-
Embedding embedding = message.getHeader(CamelLangchain4jAttributes.CAMEL_LANGCHAIN4J_EMBEDDING_VECTOR, Embedding.class);
32-
message.setBody(embedding.vector());
36+
final Embedding embedding
37+
= message.getHeader(CamelLangchain4jAttributes.CAMEL_LANGCHAIN4J_EMBEDDING_VECTOR, Embedding.class);
38+
39+
final TextSegment text = message.getBody(TextSegment.class);
40+
41+
final String id = message.getHeader(Neo4jConstants.Headers.VECTOR_ID, () -> UUID.randomUUID(), String.class);
42+
43+
Neo4jEmbedding neo4jEmbedding = new Neo4jEmbedding(id, text.text(), embedding.vector());
44+
45+
message.setBody(neo4jEmbedding);
3346
}
3447
}

0 commit comments

Comments
 (0)