Skip to content

Commit 114b59f

Browse files
Merge pull request #136 from gomate-community/pipeline
Pipeline
2 parents 2414137 + e85b23d commit 114b59f

29 files changed

+136
-32
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,8 @@ examples/download/models
3131
.gradio
3232
examples/datasets/arxiv/papers
3333
examples/projects/arxiv/papers
34-
trustrag/modules/deepsearch/.env
34+
trustrag/modules/deepresearch/.env
3535
*.env
36+
examples/deep-research
37+
examples/deep-research/local-deep-research
38+
trustrag.egg-info

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.12

README_zh.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ app后台日志:
376376
>本项目感谢以下开源项目的支持与贡献:
377377
- 文档解析:[infiniflow/ragflow](https://github.com/infiniflow/ragflow/blob/main/deepdoc/README.md)
378378
- PDF文件解析:[opendatalab/MinerU](https://github.com/opendatalab/MinerU)
379-
- 深度搜索:[deep-research](https://github.com/dzhng/deep-research)|[deep-research-py](https://github.com/epuerta9/deep-research-py)
379+
- 深度搜索:[deep-research](https://github.com/dzhng/deep-research)|[deep-research-py](https://github.com/epuerta9/deep-research-py)|[deep-research](https://github.com/shibing624/deep-research/tree/main#/)
380380

381381
## 👉 Citation
382382
```text

examples/parsers/textparser_exmaple.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,18 @@
1010
@description: coding..
1111
"""
1212
from trustrag.modules.document.txt_parser import TextParser
13-
14-
13+
from trustrag.modules.document.chunk import TextChunker
14+
from trustrag.modules.chunks.sentence_chunk import SentenceChunker
1515

1616

1717
if __name__ == '__main__':
1818
text_parser=TextParser()
19-
chunks = text_parser.parse(fnm="../../data/docs/sample.txt")
19+
tc=TextChunker()
20+
sc=SentenceChunker(chunk_size=512)
21+
paragraphs = text_parser.parse(fnm="../../data/docs/1737765690374-穷查理宝典.pdf-15a72b24-cc5c-4a4e-ae9e-7514e0d9be02.txt")
22+
print(len(paragraphs))
23+
chunks=tc.get_chunks(paragraphs,chunk_size=128)
24+
# chunks=sc.get_chunks(paragraphs)
2025
print(len(chunks))
21-
22-
for chunk in chunks:
23-
print("=="*100)
24-
print(chunk)
26+
# for chunk in chunks:
27+
# print(len(chunk))

examples/rag/rag_readme.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pickle
2+
import pandas as pd
3+
from tqdm import tqdm
4+
5+
from trustrag.modules.document.chunk import TextChunker
6+
from trustrag.modules.document.txt_parser import TextParser
7+
from trustrag.modules.document.utils import PROJECT_BASE
8+
from trustrag.modules.generator.llm import GLM4Chat
9+
from trustrag.modules.reranker.bge_reranker import BgeRerankerConfig, BgeReranker
10+
from trustrag.modules.retrieval.bm25s_retriever import BM25RetrieverConfig
11+
from trustrag.modules.retrieval.dense_retriever import DenseRetrieverConfig
12+
from trustrag.modules.retrieval.hybrid_retriever import HybridRetriever, HybridRetrieverConfig

main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
def main():
2+
print("Hello from trustrag!")
3+
4+
5+
if __name__ == "__main__":
6+
main()

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[project]
2+
name = "trustrag"
3+
version = "0.1.0"
4+
description = "Add your description here"
5+
readme = "README.md"
6+
requires-python = ">=3.12"
7+
dependencies = []

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
install_requires=required,
1919
author_email="yanqiang@ict.ac.cn",
2020
description="RAG Framework within Reliable input,Trusted output",
21-
long_description=open('README.md').read(),
21+
long_description=open('README.md','r',encoding="utf-8").read(),
2222
long_description_content_type='text/markdown',
2323
url="https://github.com/gomate-community/TrustRAG",
2424
python_requires='>=3.9',
File renamed without changes.

0 commit comments

Comments
 (0)