Skip to content

Commit 27411b1

Browse files
sabrennergithub-actions[bot]
authored andcommitted
ci(langchain): mark flaky test (#12190)
Marking a flaky test for `main`. This test will be removed entirely in deprecation once the `3.x-staging` branch is merged. ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) (cherry picked from commit 898e38c)
1 parent aa1fbaa commit 27411b1

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

Diff for: tests/contrib/langchain/test_langchain.py

+40
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from ddtrace.internal.utils.version import parse_version
1010
from tests.contrib.langchain.utils import get_request_vcr
1111
from tests.contrib.langchain.utils import long_input_text
12+
from tests.utils import flaky
1213
from tests.utils import override_global_config
1314

1415

@@ -24,6 +25,7 @@ def request_vcr():
2425
yield get_request_vcr(subdirectory_name="langchain")
2526

2627

28+
@flaky(1835812000, reason="broken test that will be fixed soon")
2729
@pytest.mark.parametrize("ddtrace_config_langchain", [dict(logs_enabled=True, log_prompt_completion_sample_rate=1.0)])
2830
def test_global_tags(ddtrace_config_langchain, langchain, request_vcr, mock_metrics, mock_logs, mock_tracer):
2931
"""
@@ -74,6 +76,7 @@ def test_global_tags(ddtrace_config_langchain, langchain, request_vcr, mock_metr
7476
)
7577

7678

79+
@flaky(1835812000, reason="broken test that will be fixed soon")
7780
@pytest.mark.skipif(PY39, reason="Python 3.10+ specific test")
7881
@pytest.mark.snapshot(ignores=["metrics.langchain.tokens.total_cost", "resource"])
7982
def test_openai_llm_sync(langchain, request_vcr):
@@ -82,6 +85,7 @@ def test_openai_llm_sync(langchain, request_vcr):
8285
llm("Can you explain what Descartes meant by 'I think, therefore I am'?")
8386

8487

88+
@flaky(1835812000, reason="broken test that will be fixed soon")
8589
@pytest.mark.skipif(not PY39, reason="Python 3.9 specific test")
8690
@pytest.mark.snapshot(ignores=["metrics.langchain.tokens.total_cost"])
8791
def test_openai_llm_sync_39(langchain, request_vcr):
@@ -90,6 +94,7 @@ def test_openai_llm_sync_39(langchain, request_vcr):
9094
llm("Can you explain what Descartes meant by 'I think, therefore I am'?")
9195

9296

97+
@flaky(1835812000, reason="broken test that will be fixed soon")
9398
@pytest.mark.skipif(PY39, reason="Python 3.10+ specific test")
9499
@pytest.mark.snapshot(ignores=["resource"])
95100
def test_openai_llm_sync_multiple_prompts(langchain, request_vcr):
@@ -103,6 +108,7 @@ def test_openai_llm_sync_multiple_prompts(langchain, request_vcr):
103108
)
104109

105110

111+
@flaky(1835812000, reason="broken test that will be fixed soon")
106112
@pytest.mark.skipif(not PY39, reason="Python 3.9 specific test")
107113
@pytest.mark.snapshot
108114
def test_openai_llm_sync_multiple_prompts_39(langchain, request_vcr):
@@ -116,6 +122,7 @@ def test_openai_llm_sync_multiple_prompts_39(langchain, request_vcr):
116122
)
117123

118124

125+
@flaky(1835812000, reason="broken test that will be fixed soon")
119126
@pytest.mark.asyncio
120127
@pytest.mark.snapshot(ignores=["resource", "langchain.request.openai.parameters.request_timeout"])
121128
async def test_openai_llm_async(langchain, request_vcr):
@@ -125,6 +132,7 @@ async def test_openai_llm_async(langchain, request_vcr):
125132
await llm.agenerate(["Which team won the 2019 NBA finals?"])
126133

127134

135+
@flaky(1835812000, reason="broken test that will be fixed soon")
128136
@pytest.mark.snapshot(ignores=["meta.error.stack", "resource"])
129137
def test_openai_llm_error(langchain, request_vcr):
130138
import openai # Imported here because the os env OPENAI_API_KEY needs to be set via langchain fixture before import
@@ -140,13 +148,15 @@ def test_openai_llm_error(langchain, request_vcr):
140148
llm.generate([12345, 123456])
141149

142150

151+
@flaky(1835812000, reason="broken test that will be fixed soon")
143152
@pytest.mark.snapshot(ignores=["resource"])
144153
def test_cohere_llm_sync(langchain, request_vcr):
145154
llm = langchain.llms.Cohere(cohere_api_key=os.getenv("COHERE_API_KEY", "<not-a-real-key>"))
146155
with request_vcr.use_cassette("cohere_completion_sync.yaml"):
147156
llm("What is the secret Krabby Patty recipe?")
148157

149158

159+
@flaky(1835812000, reason="broken test that will be fixed soon")
150160
@pytest.mark.snapshot(ignores=["resource"])
151161
def test_huggingfacehub_llm_sync(langchain, request_vcr):
152162
llm = langchain.llms.HuggingFaceHub(
@@ -158,6 +168,7 @@ def test_huggingfacehub_llm_sync(langchain, request_vcr):
158168
llm("Why does Mr. Krabs have a whale daughter?")
159169

160170

171+
@flaky(1835812000, reason="broken test that will be fixed soon")
161172
@pytest.mark.snapshot(ignores=["meta.langchain.response.completions.0.text", "resource"])
162173
def test_ai21_llm_sync(langchain, request_vcr):
163174
llm = langchain.llms.AI21(ai21_api_key=os.getenv("AI21_API_KEY", "<not-a-real-key>"))
@@ -166,6 +177,7 @@ def test_ai21_llm_sync(langchain, request_vcr):
166177
llm("Why does everyone in Bikini Bottom hate Plankton?")
167178

168179

180+
@flaky(1835812000, reason="broken test that will be fixed soon")
169181
def test_openai_llm_metrics(langchain, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
170182
llm = langchain.llms.OpenAI(model="text-davinci-003")
171183
cassette_name = "openai_completion_sync_39.yaml" if PY39 else "openai_completion_sync.yaml"
@@ -194,6 +206,7 @@ def test_openai_llm_metrics(langchain, request_vcr, mock_metrics, mock_logs, sna
194206
mock_logs.assert_not_called()
195207

196208

209+
@flaky(1835812000, reason="broken test that will be fixed soon")
197210
@pytest.mark.parametrize(
198211
"ddtrace_config_langchain",
199212
[dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -227,6 +240,7 @@ def test_llm_logs(langchain, ddtrace_config_langchain, request_vcr, mock_logs, m
227240
mock_metrics.count.assert_not_called()
228241

229242

243+
@flaky(1835812000, reason="broken test that will be fixed soon")
230244
@pytest.mark.skipif(PY39, reason="Python 3.10+ specific test")
231245
@pytest.mark.snapshot(
232246
token="tests.contrib.langchain.test_langchain.test_openai_chat_model_call",
@@ -238,6 +252,7 @@ def test_openai_chat_model_sync_call(langchain, request_vcr):
238252
chat(messages=[langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])
239253

240254

255+
@flaky(1835812000, reason="broken test that will be fixed soon")
241256
@pytest.mark.skipif(not PY39, reason="Python 3.9 specific test")
242257
@pytest.mark.snapshot(ignores=["metrics.langchain.tokens.total_cost"])
243258
def test_openai_chat_model_sync_call_39(langchain, request_vcr):
@@ -246,6 +261,7 @@ def test_openai_chat_model_sync_call_39(langchain, request_vcr):
246261
chat([langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])
247262

248263

264+
@flaky(1835812000, reason="broken test that will be fixed soon")
249265
@pytest.mark.skipif(PY39, reason="Python 3.10+ specific test")
250266
@pytest.mark.snapshot(
251267
token="tests.contrib.langchain.test_langchain.test_openai_chat_model_generate",
@@ -270,6 +286,7 @@ def test_openai_chat_model_sync_generate(langchain, request_vcr):
270286
)
271287

272288

289+
@flaky(1835812000, reason="broken test that will be fixed soon")
273290
@pytest.mark.skipif(not PY39, reason="Python 3.9 specific test")
274291
@pytest.mark.snapshot(ignores=["metrics.langchain.tokens.total_cost"])
275292
def test_openai_chat_model_sync_generate_39(langchain, request_vcr):
@@ -291,6 +308,7 @@ def test_openai_chat_model_sync_generate_39(langchain, request_vcr):
291308
)
292309

293310

311+
@flaky(1835812000, reason="broken test that will be fixed soon")
294312
@pytest.mark.asyncio
295313
@pytest.mark.snapshot(
296314
token="tests.contrib.langchain.test_langchain.test_openai_chat_model_call",
@@ -302,6 +320,7 @@ async def test_openai_chat_model_async_call(langchain, request_vcr):
302320
await chat._call_async([langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])
303321

304322

323+
@flaky(1835812000, reason="broken test that will be fixed soon")
305324
@pytest.mark.asyncio
306325
@pytest.mark.snapshot(
307326
token="tests.contrib.langchain.test_langchain.test_openai_chat_model_generate",
@@ -326,6 +345,7 @@ async def test_openai_chat_model_async_generate(langchain, request_vcr):
326345
)
327346

328347

348+
@flaky(1835812000, reason="broken test that will be fixed soon")
329349
def test_chat_model_metrics(langchain, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
330350
chat = langchain.chat_models.ChatOpenAI(temperature=0, max_tokens=256)
331351
cassette_name = "openai_chat_completion_sync_call_39.yaml" if PY39 else "openai_chat_completion_sync_call.yaml"
@@ -354,6 +374,7 @@ def test_chat_model_metrics(langchain, request_vcr, mock_metrics, mock_logs, sna
354374
mock_logs.assert_not_called()
355375

356376

377+
@flaky(1835812000, reason="broken test that will be fixed soon")
357378
@pytest.mark.parametrize(
358379
"ddtrace_config_langchain",
359380
[dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -387,6 +408,7 @@ def test_chat_model_logs(langchain, ddtrace_config_langchain, request_vcr, mock_
387408
mock_metrics.count.assert_not_called()
388409

389410

411+
@flaky(1835812000, reason="broken test that will be fixed soon")
390412
@pytest.mark.snapshot
391413
def test_openai_embedding_query(langchain, request_vcr):
392414
embeddings = langchain.embeddings.OpenAIEmbeddings()
@@ -395,6 +417,7 @@ def test_openai_embedding_query(langchain, request_vcr):
395417
embeddings.embed_query("this is a test query.")
396418

397419

420+
@flaky(1835812000, reason="broken test that will be fixed soon")
398421
@pytest.mark.skip(reason="Tiktoken request to get model encodings cannot be made in CI")
399422
@pytest.mark.snapshot
400423
def test_openai_embedding_document(langchain, request_vcr):
@@ -416,6 +439,7 @@ def test_fake_embedding_document(langchain):
416439
embeddings.embed_documents(texts=["foo", "bar"])
417440

418441

442+
@flaky(1835812000, reason="broken test that will be fixed soon")
419443
def test_openai_embedding_metrics(langchain, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
420444
embeddings = langchain.embeddings.OpenAIEmbeddings()
421445
cassette_name = "openai_embedding_query_39.yaml" if PY39 else "openai_embedding_query.yaml"
@@ -438,6 +462,7 @@ def test_openai_embedding_metrics(langchain, request_vcr, mock_metrics, mock_log
438462
mock_logs.assert_not_called()
439463

440464

465+
@flaky(1835812000, reason="broken test that will be fixed soon")
441466
@pytest.mark.parametrize(
442467
"ddtrace_config_langchain",
443468
[dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -470,6 +495,7 @@ def test_embedding_logs(langchain, ddtrace_config_langchain, request_vcr, mock_l
470495
mock_metrics.count.assert_not_called()
471496

472497

498+
@flaky(1835812000, reason="broken test that will be fixed soon")
473499
@pytest.mark.snapshot(
474500
token="tests.contrib.langchain.test_langchain.test_openai_math_chain",
475501
ignores=["metrics.langchain.tokens.total_cost", "resource"],
@@ -485,6 +511,7 @@ def test_openai_math_chain_sync(langchain, request_vcr):
485511
chain.run("what is two raised to the fifty-fourth power?")
486512

487513

514+
@flaky(1835812000, reason="broken test that will be fixed soon")
488515
@pytest.mark.asyncio
489516
@pytest.mark.snapshot(
490517
token="tests.contrib.langchain.test_langchain.test_openai_math_chain",
@@ -500,6 +527,7 @@ async def test_openai_math_chain_async(langchain, request_vcr):
500527
await chain.acall("what is two raised to the fifty-fourth power?")
501528

502529

530+
@flaky(1835812000, reason="broken test that will be fixed soon")
503531
@pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain.test_cohere_math_chain")
504532
def test_cohere_math_chain_sync(langchain, request_vcr):
505533
"""
@@ -513,6 +541,7 @@ def test_cohere_math_chain_sync(langchain, request_vcr):
513541
chain.run("what is thirteen raised to the .3432 power?")
514542

515543

544+
@flaky(1835812000, reason="broken test that will be fixed soon")
516545
@pytest.mark.skipif(PY39, reason="Requires unnecessary cassette file for Python 3.9")
517546
@pytest.mark.snapshot(
518547
token="tests.contrib.langchain.test_langchain.test_openai_sequential_chain",
@@ -570,6 +599,7 @@ def _transform_func(inputs):
570599
sequential_chain.run({"text": input_text, "style": "a 90s rapper"})
571600

572601

602+
@flaky(1835812000, reason="broken test that will be fixed soon")
573603
@pytest.mark.skipif(PY39, reason="Requires unnecessary cassette file for Python 3.9")
574604
@pytest.mark.snapshot(ignores=["langchain.tokens.total_cost", "resource"])
575605
def test_openai_sequential_chain_with_multiple_llm_sync(langchain, request_vcr):
@@ -599,6 +629,7 @@ def test_openai_sequential_chain_with_multiple_llm_sync(langchain, request_vcr):
599629
sequential_chain.run({"input_text": long_input_text})
600630

601631

632+
@flaky(1835812000, reason="broken test that will be fixed soon")
602633
@pytest.mark.asyncio
603634
@pytest.mark.snapshot(ignores=["resource"])
604635
async def test_openai_sequential_chain_with_multiple_llm_async(langchain, request_vcr):
@@ -627,6 +658,7 @@ async def test_openai_sequential_chain_with_multiple_llm_async(langchain, reques
627658
await sequential_chain.acall({"input_text": long_input_text})
628659

629660

661+
@flaky(1835812000, reason="broken test that will be fixed soon")
630662
def test_openai_chain_metrics(langchain, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
631663
chain = langchain.chains.LLMMathChain(llm=langchain.llms.OpenAI(temperature=0))
632664
cassette_name = "openai_math_chain_sync_39.yaml" if PY39 else "openai_math_chain_sync.yaml"
@@ -655,6 +687,7 @@ def test_openai_chain_metrics(langchain, request_vcr, mock_metrics, mock_logs, s
655687
mock_logs.assert_not_called()
656688

657689

690+
@flaky(1835812000, reason="broken test that will be fixed soon")
658691
@pytest.mark.parametrize(
659692
"ddtrace_config_langchain",
660693
[dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -763,6 +796,7 @@ def test_chat_prompt_template_does_not_parse_template(langchain, mock_tracer):
763796
assert chain_span.get_tag("langchain.request.prompt") is None
764797

765798

799+
@flaky(1835812000, reason="broken test that will be fixed soon")
766800
@pytest.mark.snapshot
767801
def test_pinecone_vectorstore_similarity_search(langchain, request_vcr):
768802
"""
@@ -783,6 +817,7 @@ def test_pinecone_vectorstore_similarity_search(langchain, request_vcr):
783817
vectorstore.similarity_search("Who was Alan Turing?", 1)
784818

785819

820+
@flaky(1835812000, reason="broken test that will be fixed soon")
786821
@pytest.mark.skipif(PY39, reason="Cassette specific to Python 3.10+")
787822
@pytest.mark.snapshot
788823
def test_pinecone_vectorstore_retrieval_chain(langchain, request_vcr):
@@ -808,6 +843,7 @@ def test_pinecone_vectorstore_retrieval_chain(langchain, request_vcr):
808843
qa_with_sources("Who was Alan Turing?")
809844

810845

846+
@flaky(1835812000, reason="broken test that will be fixed soon")
811847
@pytest.mark.skipif(not PY39, reason="Cassette specific to Python 3.9")
812848
@pytest.mark.snapshot
813849
def test_pinecone_vectorstore_retrieval_chain_39(langchain, request_vcr):
@@ -833,6 +869,7 @@ def test_pinecone_vectorstore_retrieval_chain_39(langchain, request_vcr):
833869
qa_with_sources("Who was Alan Turing?")
834870

835871

872+
@flaky(1835812000, reason="broken test that will be fixed soon")
836873
def test_vectorstore_similarity_search_metrics(langchain, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
837874
import pinecone
838875

@@ -863,6 +900,7 @@ def test_vectorstore_similarity_search_metrics(langchain, request_vcr, mock_metr
863900
mock_logs.assert_not_called()
864901

865902

903+
@flaky(1835812000, reason="broken test that will be fixed soon")
866904
@pytest.mark.parametrize(
867905
"ddtrace_config_langchain",
868906
[dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -924,6 +962,7 @@ def test_vectorstore_logs(langchain, ddtrace_config_langchain, request_vcr, mock
924962
mock_metrics.count.assert_not_called()
925963

926964

965+
@flaky(1835812000, reason="broken test that will be fixed soon")
927966
@pytest.mark.skipif(PY39, reason="Requires unnecessary cassette file for Python 3.9")
928967
@pytest.mark.snapshot(ignores=["metrics.langchain.tokens.total_cost", "resource"])
929968
def test_openai_integration(langchain, request_vcr, ddtrace_run_python_code_in_subprocess):
@@ -956,6 +995,7 @@ def test_openai_integration(langchain, request_vcr, ddtrace_run_python_code_in_s
956995
assert err == b""
957996

958997

998+
@flaky(1835812000, reason="broken test that will be fixed soon")
959999
@pytest.mark.skipif(PY39, reason="Requires unnecessary cassette file for Python 3.9")
9601000
@pytest.mark.snapshot(ignores=["metrics.langchain.tokens.total_cost", "resource"])
9611001
@pytest.mark.parametrize("schema_version", [None, "v0", "v1"])

0 commit comments

Comments
 (0)