Skip to content

Commit

Permalink
remove unpublished documents
Browse files Browse the repository at this point in the history
  • Loading branch information
JosuaKrause committed Aug 28, 2024
1 parent cfdd76b commit 852499c
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions app/system/autotag/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
get_tags_for_main_id,
is_ready,
is_updating_tag_group,
remove_tag_member,
write_tag,
)
from app.system.autotag.platform import fill_in_everything, process_main_ids
Expand Down Expand Up @@ -212,6 +213,7 @@ def tagger_compute(entry: TaggerPayload) -> str:
process_queue_redis=process_queue_redis,
graph_tags=graph_tags,
get_full_text=get_full_text,
doc_is_remove=doc_is_remove,
process_enqueue=process_enqueue)
if entry["stage"] == "cluster":
return tagger_cluster(
Expand Down Expand Up @@ -407,6 +409,7 @@ def tagger_tag(
process_queue_redis: Redis,
graph_tags: GraphProfile,
get_full_text: FullTextFn,
doc_is_remove: IsRemoveFn,
process_enqueue: ProcessEnqueue[TaggerPayload]) -> str:
"""
Computes the auto-tags for pending documents.
Expand All @@ -417,6 +420,8 @@ def tagger_tag(
graph_tags (GraphProfile): Model for extracting document keywords.
get_full_text (FullTextFn): Gets the full text of a document
(via main id).
doc_is_remove (IsRemoveFn): Whether a document (via main id) has been
removed.
process_enqueue (ProcessEnqueue[TaggerPayload]): Enqueues the next
step.
Expand All @@ -434,6 +439,13 @@ def tagger_tag(
for elem in list(get_incomplete(session)):
main_id = elem["main_id"]
tag_group = elem["tag_group"]
is_remove, error_remove = doc_is_remove(main_id)
if error_remove is not None:
errors.append(error_remove)
continue
if is_remove:
remove_tag_member(session, tag_group, main_id)
continue
keywords, error = tag_doc(
main_id,
graph_tags=graph_tags,
Expand Down

0 comments on commit 852499c

Please sign in to comment.