Skip to content

Commit

Permalink
Latest tooling and deps (#842)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesbraza authored Feb 4, 2025
1 parent df401d1 commit 1bde36c
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 130 deletions.
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ repos:
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.1
rev: v0.9.4
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.10.0
rev: 25.1.0
hooks:
- id: black
- repo: https://github.com/rbubley/mirrors-prettier
Expand All @@ -40,7 +40,7 @@ repos:
hooks:
- id: toml-sort-fix
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
rev: v2.4.1
hooks:
- id: codespell
additional_dependencies: [".[toml]"]
Expand All @@ -55,15 +55,15 @@ repos:
hooks:
- id: check-mailmap
- repo: https://github.com/henryiii/validate-pyproject-schema-store
rev: 2025.01.10
rev: 2025.02.03
hooks:
- id: validate-pyproject
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.4.30
rev: 0.5.26
hooks:
- id: uv-lock
- repo: https://github.com/renovatebot/pre-commit-hooks
rev: 39.100.1
rev: 39.158.1
hooks:
- id: renovate-config-validator
args: [--strict]
Expand Down
4 changes: 2 additions & 2 deletions paperqa/agents/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def make_clinical_trial_status(

# SEE: https://regex101.com/r/L0L5MH/1
CLINICAL_STATUS_SEARCH_REGEX_PATTERN: str = (
r"Status: Paper Count=(\d+) \| Relevant Papers=(\d+)(?:\s\|\sClinical Trial Count=(\d+)\s"
r"\|\sRelevant Clinical Trials=(\d+))?\s\|\sCurrent Evidence=(\d+)"
r"Status: Paper Count=(\d+) \| Relevant Papers=(\d+)(?:\s\|\sClinical Trial"
r" Count=(\d+)\s\|\sRelevant Clinical Trials=(\d+))?\s\|\sCurrent Evidence=(\d+)"
)


Expand Down
10 changes: 5 additions & 5 deletions paperqa/agents/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ async def gather_evidence(self, question: str, state: EnvironmentState) -> str:
)

return (
f"Added {l1 - l0} pieces of evidence, {l1_relevant - l0_relevant} of which were"
f" relevant.{best_evidence}\n\n" + status
f"Added {l1 - l0} pieces of evidence, {l1_relevant - l0_relevant} of which"
f" were relevant.{best_evidence}\n\n" + status
)


Expand Down Expand Up @@ -649,9 +649,9 @@ async def clinical_trials_search(self, query: str, state: EnvironmentState) -> s
self.previous_searches[query] += self.search_count
if error_message is None:
return (
f"Found clinical trial search results from search {offset} to {offset + new_result_count}"
f" among {total_result_count} total results."
f" {state.status}"
f"Found clinical trial search results from search {offset} to"
f" {offset + new_result_count} among {total_result_count} total"
f" results. {state.status}"
)
return f"Error in clinical trial query syntax: {error_message}"

Expand Down
8 changes: 6 additions & 2 deletions paperqa/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,10 @@ async def similarity_search(
class QdrantVectorStore(VectorStore):
client: Any = Field(
default=None,
description="Instance of `qdrant_client.AsyncQdrantClient`. Defaults to an in-memory instance.",
description=(
"Instance of `qdrant_client.AsyncQdrantClient`. Defaults to an in-memory"
" instance."
),
)
collection_name: str = Field(default_factory=lambda: f"paper-qa-{uuid.uuid4().hex}")
vector_name: str | None = Field(default=None)
Expand Down Expand Up @@ -329,7 +332,8 @@ def validate_client(self):

if self.client and not isinstance(self.client, AsyncQdrantClient):
raise TypeError(
f"'client' should be an instance of AsyncQdrantClient. Got `{type(self.client)}`"
"'client' should be an instance of AsyncQdrantClient. Got"
f" `{type(self.client)}`"
)

if not self.client:
Expand Down
5 changes: 3 additions & 2 deletions paperqa/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,8 +484,9 @@ class AgentSettings(BaseModel):
agent_evidence_n: int = Field(
default=1,
ge=1,
description="Top n ranked evidences shown to the "
"agent after the GatherEvidence tool.",
description=(
"Top n ranked evidences shown to the agent after the GatherEvidence tool."
),
)
timeout: float = Field(
default=500.0,
Expand Down
15 changes: 12 additions & 3 deletions paperqa/sources/clinical_trials.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,13 +191,19 @@ def parse_clinical_trial(json_data: dict[str, Any]) -> str:
"=" * 25,
f"NCT Number: {identification.get('nctId', 'Not provided')}",
f"Title: {identification.get('briefTitle', 'Not provided')}",
f"Organization: {identification.get('organization', {}).get('fullName', 'Not provided')}",
(
"Organization:"
f" {identification.get('organization', {}).get('fullName', 'Not provided')}"
),
# Status Information
"\nSTUDY STATUS",
"=" * 13,
f"Overall Status: {status.get('overallStatus', 'Not provided')}",
f"Start Date: {status.get('startDateStruct', {}).get('date', 'Not provided')}",
f"Completion Date: {status.get('completionDateStruct', {}).get('date', 'Not provided')}",
(
"Completion Date:"
f" {status.get('completionDateStruct', {}).get('date', 'Not provided')}"
),
# Study Description
"\nSTUDY DESCRIPTION",
"=" * 17,
Expand All @@ -207,7 +213,10 @@ def parse_clinical_trial(json_data: dict[str, Any]) -> str:
"=" * 13,
f"Study Type: {design.get('studyType', 'Not provided')}",
f"Phase: {', '.join(design.get('phases', ['Not provided']))}",
f"Enrollment: {design.get('enrollmentInfo', {}).get('count', 'Not provided')} participants",
(
"Enrollment:"
f" {design.get('enrollmentInfo', {}).get('count', 'Not provided')} participants"
),
# Eligibility
"\nELIGIBILITY CRITERIA",
"=" * 19,
Expand Down
3 changes: 2 additions & 1 deletion paperqa/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ def filter_content_for_user(self) -> None:
class Answer(PQASession):
def __init__(self, *args, **kwargs):
warnings.warn(
"The 'Answer' class is deprecated and will be removed in future versions. Use 'PQASession' instead.",
"The 'Answer' class is deprecated and will be removed in future versions."
" Use 'PQASession' instead.",
DeprecationWarning,
stacklevel=2,
)
Expand Down
8 changes: 6 additions & 2 deletions paperqa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,9 @@ def extract_thought(content: str | None) -> str:
"dataset": "misc", # No direct equivalent, so 'misc' is used
"component": "misc", # No direct equivalent, so 'misc' is used
"report": "techreport",
"report-series": "techreport", # 'series' implies multiple tech reports, but each is still a 'techreport'
"report-series": ( # 'series' implies multiple tech reports, but each is still a 'techreport'
"techreport"
),
"standard": "misc", # No direct equivalent, so 'misc' is used
"standard-series": "misc", # No direct equivalent, so 'misc' is used
"edited-book": "book", # Edited books are considered books in BibTeX
Expand All @@ -514,7 +516,9 @@ def extract_thought(content: str | None) -> str:
"book-section": "inbook", # Sections in books can be considered as 'inbook'
"book-part": "inbook", # Parts of books can be considered as 'inbook'
"book-track": "inbook", # Tracks in books can be considered as 'inbook'
"reference-entry": "inbook", # Entries in reference books can be considered as 'inbook'
"reference-entry": ( # Entries in reference books can be considered as 'inbook'
"inbook"
),
"dissertation": "phdthesis", # Dissertations are usually PhD thesis
"posted-content": "misc", # No direct equivalent, so 'misc' is used
"peer-review": "misc", # No direct equivalent, so 'misc' is used
Expand Down
8 changes: 5 additions & 3 deletions tests/test_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,9 @@ async def test_s2_only_fields_filtering() -> None:
assert s2_details
assert s2_details.authors, "Authors should be populated"
assert set(s2_details.other["client_source"]) == {"semantic_scholar"}
assert s2_details.citation == (
"Andrés M Bran, Sam Cox, Oliver Schilter, Carlo Baldassari, Andrew D."
assert (
s2_details.citation
== "Andrés M Bran, Sam Cox, Oliver Schilter, Carlo Baldassari, Andrew D."
" White, and P. Schwaller. Augmenting large language models with chemistry"
" tools. ArXiv, Unknown year. URL:"
" https://doi.org/10.48550/arxiv.2304.05376,"
Expand Down Expand Up @@ -429,7 +430,8 @@ async def test_crossref_journalquality_fields_filtering() -> None:
)
nejm_crossref_details = await crossref_client.query(
title=(
"Beta-Blocker Interruption or Continuation after Myocardial Infarction" # codespell:ignore
"Beta-Blocker Interruption or Continuation after Myocardial"
" Infarction" # codespell:ignore
),
fields=["title", "doi", "authors", "journal"],
)
Expand Down
7 changes: 4 additions & 3 deletions tests/test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def test_matches_filter_criteria(doc_class, doc_data, filter_criteria, expected_
def test_citation_prompt_current_year():
expected_year_text = f"the current year is {get_year()}"

assert (
expected_year_text in citation_prompt
), f"Citation prompt should contain '{expected_year_text}' but got: {citation_prompt}"
assert expected_year_text in citation_prompt, (
f"Citation prompt should contain '{expected_year_text}' but got:"
f" {citation_prompt}"
)
Loading

0 comments on commit 1bde36c

Please sign in to comment.