Skip to content

Commit e294f47

Browse files
authored
ci(langchain): mark all llm cassette tests as flaky (#12206)
Ever since we re-enabled the langchain tests, we've gotten a surge of flaky langchain tests that are all centered around our mocking openai/llm network calls via vcrpy. While we work on a more robust/long-term solution, I'm marking these tests as flaky to unblock others on CI. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
1 parent 7ecddbe commit e294f47

File tree

2 files changed

+22
-211
lines changed

2 files changed

+22
-211
lines changed

tests/contrib/langchain/cassettes/openai_embedding_query.yaml

Lines changed: 0 additions & 201 deletions
This file was deleted.

tests/contrib/langchain/test_langchain.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,15 @@ def request_vcr():
3737
yield get_request_vcr()
3838

3939

40+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
4041
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
4142
def test_openai_llm_sync(langchain_openai, request_vcr):
4243
llm = langchain_openai.OpenAI()
4344
with request_vcr.use_cassette("openai_completion_sync.yaml"):
4445
llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?")
4546

4647

48+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
4749
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
4850
def test_openai_llm_sync_multiple_prompts(langchain_openai, request_vcr):
4951
llm = langchain_openai.OpenAI()
@@ -56,6 +58,7 @@ def test_openai_llm_sync_multiple_prompts(langchain_openai, request_vcr):
5658
)
5759

5860

61+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
5962
@pytest.mark.asyncio
6063
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
6164
async def test_openai_llm_async(langchain_openai, request_vcr):
@@ -64,6 +67,7 @@ async def test_openai_llm_async(langchain_openai, request_vcr):
6467
await llm.agenerate(["Which team won the 2019 NBA finals?"])
6568

6669

70+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
6771
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
6872
def test_openai_llm_error(langchain, langchain_openai, request_vcr):
6973
import openai # Imported here because the os env OPENAI_API_KEY needs to be set via langchain fixture before import
@@ -79,6 +83,7 @@ def test_openai_llm_error(langchain, langchain_openai, request_vcr):
7983
llm.generate([12345, 123456])
8084

8185

86+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
8287
@pytest.mark.skipif(LANGCHAIN_VERSION < (0, 2), reason="Requires separate cassette for langchain v0.1")
8388
@pytest.mark.snapshot
8489
def test_cohere_llm_sync(langchain_cohere, request_vcr):
@@ -87,6 +92,7 @@ def test_cohere_llm_sync(langchain_cohere, request_vcr):
8792
llm.invoke("What is the secret Krabby Patty recipe?")
8893

8994

95+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
9096
@pytest.mark.skipif(
9197
LANGCHAIN_VERSION < (0, 2) or sys.version_info < (3, 10),
9298
reason="Requires separate cassette for langchain v0.1, Python 3.9",
@@ -186,8 +192,7 @@ async def test_openai_chat_model_async_generate(langchain_openai, request_vcr):
186192
def test_openai_embedding_query(langchain_openai, request_vcr):
187193
with mock.patch("langchain_openai.OpenAIEmbeddings._get_len_safe_embeddings", return_value=[0.0] * 1536):
188194
embeddings = langchain_openai.OpenAIEmbeddings()
189-
with request_vcr.use_cassette("openai_embedding_query.yaml"):
190-
embeddings.embed_query("this is a test query.")
195+
embeddings.embed_query("this is a test query.")
191196

192197

193198
@pytest.mark.snapshot
@@ -227,6 +232,7 @@ def test_pinecone_vectorstore_similarity_search(langchain_openai, request_vcr):
227232
vectorstore.similarity_search("Who was Alan Turing?", 1)
228233

229234

235+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
230236
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
231237
def test_lcel_chain_simple(langchain_core, langchain_openai, request_vcr):
232238
prompt = langchain_core.prompts.ChatPromptTemplate.from_messages(
@@ -239,6 +245,7 @@ def test_lcel_chain_simple(langchain_core, langchain_openai, request_vcr):
239245
chain.invoke({"input": "how can langsmith help with testing?"})
240246

241247

248+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
242249
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
243250
def test_lcel_chain_complicated(langchain_core, langchain_openai, request_vcr):
244251
prompt = langchain_core.prompts.ChatPromptTemplate.from_template(
@@ -268,6 +275,7 @@ def test_lcel_chain_complicated(langchain_core, langchain_openai, request_vcr):
268275
chain.invoke({"topic": "chickens", "style": "a 90s rapper"})
269276

270277

278+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
271279
@pytest.mark.asyncio
272280
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
273281
async def test_lcel_chain_simple_async(langchain_core, langchain_openai, request_vcr):
@@ -315,6 +323,7 @@ def test_lcel_chain_batch_311(langchain_core, langchain_openai, request_vcr):
315323
chain.batch(inputs=["chickens", "pigs"])
316324

317325

326+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
318327
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
319328
def test_lcel_chain_nested(langchain_core, langchain_openai, request_vcr):
320329
"""
@@ -367,6 +376,7 @@ def test_lcel_chain_non_dict_input(langchain_core):
367376
sequence.invoke(1)
368377

369378

379+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
370380
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
371381
def test_lcel_with_tools_openai(langchain_core, langchain_openai, request_vcr):
372382
import langchain_core.tools
@@ -387,6 +397,7 @@ def add(a: int, b: int) -> int:
387397
llm_with_tools.invoke("What is the sum of 1 and 2?")
388398

389399

400+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
390401
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
391402
def test_lcel_with_tools_anthropic(langchain_core, langchain_anthropic, request_vcr):
392403
import langchain_core.tools
@@ -413,16 +424,15 @@ def test_faiss_vectorstore_retrieval(langchain_community, langchain_openai, requ
413424
pytest.skip("langchain-community not installed which is required for this test.")
414425
pytest.importorskip("faiss", reason="faiss required for this test.")
415426
with mock.patch("langchain_openai.OpenAIEmbeddings._get_len_safe_embeddings", return_value=[[0.0] * 1536]):
416-
with request_vcr.use_cassette("openai_embedding_query.yaml"):
417-
faiss = langchain_community.vectorstores.faiss.FAISS.from_texts(
418-
["this is a test query."],
419-
embedding=langchain_openai.OpenAIEmbeddings(),
420-
)
421-
retriever = faiss.as_retriever()
427+
faiss = langchain_community.vectorstores.faiss.FAISS.from_texts(
428+
["this is a test query."], embedding=langchain_openai.OpenAIEmbeddings()
429+
)
430+
retriever = faiss.as_retriever()
422431
with request_vcr.use_cassette("openai_retrieval_embedding.yaml"):
423432
retriever.invoke("What was the message of the last test query?")
424433

425434

435+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
426436
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
427437
def test_streamed_chain(langchain_core, langchain_openai, streamed_response_responder):
428438
client = streamed_response_responder(
@@ -444,6 +454,7 @@ def test_streamed_chain(langchain_core, langchain_openai, streamed_response_resp
444454
pass
445455

446456

457+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
447458
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
448459
def test_streamed_chat(langchain_openai, streamed_response_responder):
449460
client = streamed_response_responder(
@@ -459,6 +470,7 @@ def test_streamed_chat(langchain_openai, streamed_response_responder):
459470
pass
460471

461472

473+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
462474
@pytest.mark.snapshot(ignores=IGNORE_FIELDS)
463475
def test_streamed_llm(langchain_openai, streamed_response_responder):
464476
client = streamed_response_responder(
@@ -520,6 +532,7 @@ async def test_astreamed_chat(langchain_openai, async_streamed_response_responde
520532
pass
521533

522534

535+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
523536
@pytest.mark.snapshot(
524537
ignores=IGNORE_FIELDS,
525538
token="tests.contrib.langchain.test_langchain.test_streamed_llm",
@@ -539,8 +552,7 @@ async def test_astreamed_llm(langchain_openai, async_streamed_response_responder
539552
pass
540553

541554

542-
# TODO: needs fixing in follow-up
543-
@pytest.mark.skip(reason="Problematic test that needs fixing")
555+
@flaky(until=1754218112, reason="Problematic test that needs fixing")
544556
@pytest.mark.snapshot(ignores=(IGNORE_FIELDS + ["meta.langchain.request.inputs.0"]))
545557
def test_streamed_json_output_parser(langchain, langchain_core, langchain_openai, streamed_response_responder):
546558
client = streamed_response_responder(

0 commit comments

Comments
 (0)