You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
this is in the feedreader.py file that processes the data
defupdate_articles_from_feeds(self):
sources=Sources.objects.filter(method='rss', source_for='science paper')
forsourceinsources:
feed=self.fetch_feed(source.link, source.ignore_ssl)
forentryinfeed['entries']:
title=entry['title']
self.stdout.write(f"Processing {title}")
summary=entry.get('summary', '')
ifhasattr(entry, 'summary_detail'):
summary=entry['summary_detail']['value']
published=entry.get('published')
if'pubmed'insource.linkandhasattr(entry, 'content'):
summary=entry['content'][0]['value']
published_date=parse(entry.get('published') orentry.get('prism_coverdate'), tzinfos=self.tzinfos).astimezone(pytz.utc)
link=greg.remove_utm(entry['link'])
doi=Noneif'pubmed'insource.linkandentry.get('dc_identifier', '').startswith('doi:'):
doi=entry['dc_identifier'].replace('doi:', '')
elif'faseb'insource.link:
doi=entry.get('prism_doi', '')
ifdoi:
crossref_paper=SciencePaper(doi=doi)
crossref_paper.refresh()
title=crossref_paper.titleifcrossref_paper.titleelseentry['title']
summary=crossref_paper.abstractifcrossref_paper.abstractelseentry.get('summary')
# Check if an article with the same DOI or title existsexisting_article=Articles.objects.filter(Q(doi=doi) |Q(title=title)).first()
ifexisting_article:
science_paper=existing_articlecreated=Falseelse:
science_paper=Articles.objects.create(
doi=doi,
title=title,
summary=summary,
link=link,
published_date=published_date,
container_title=crossref_paper.journal,
publisher=crossref_paper.publisher,
access=crossref_paper.access,
crossref_check=timezone.now()
)
created=Trueifcreated:
science_paper.teams.add(source.team)
science_paper.subjects.add(source.subject)
science_paper.sources.add(source)
science_paper.save()
else:
ifany([science_paper.title!=title, science_paper.summary!=SciencePaper.clean_abstract(abstract=summary),
science_paper.link!=link, science_paper.published_date!=published_date]):
science_paper.title=titlescience_paper.summary=SciencePaper.clean_abstract(abstract=summary)
science_paper.link=linkscience_paper.published_date=published_datescience_paper.sources.add(source)
science_paper.teams.add(source.team)
science_paper.subjects.add(source.subject)
science_paper.save()
# Process author informationifcrossref_paperisnotNone: # Assuming `paper` contains the article's metadata including author informationifcrossref_paper.authorsisnotNone:
forauthor_infoincrossref_paper.authors:
given_name=author_info.get('given')
family_name=author_info.get('family')
orcid=author_info.get('ORCID', None)
try:
iforcid: # If ORCID is present, use it as the primary key for author lookup/creationauthor_obj, author_created=Authors.objects.get_or_create(
ORCID=orcid,
defaults={
'given_name': given_name,
'family_name': family_name
}
)
else: # If no ORCID is provided, fallback to using given_name and family_name for lookup/creationifnotgiven_nameornotfamily_name:
self.stdout.write(f"Missing given name or family name, skipping this author. {crossref_paper.doi}")
continueelse:
author_obj, author_created=Authors.objects.get_or_create(
given_name=given_name,
family_name=family_name,
defaults={'ORCID': orcid} # orcid will be an empty string if not provided, which is fine
)
exceptMultipleObjectsReturned:
# Handle the case where multiple authors are returnedauthors=Authors.objects.filter(given_name=given_name, family_name=family_name)
print(f"Multiple authors found for {given_name}{family_name}:")
forauthorinauthors:
print(f"Author ID: {author.author_id}, ORCID: {author.ORCID}")
# Use the first author with an ORCID, if availableauthor_obj=next((authorforauthorinauthorsifauthor.ORCID), authors.first())
# Link the author to the article if not already linkedifnotscience_paper.authors.filter(pk=author_obj.pk).exists():
science_paper.authors.add(author_obj)
else:
print('no DOI, trying to create article')
existing_article=Articles.objects.filter(title=title).first()
ifexisting_article:
science_paper=existing_articlecreated=Falseelse:
science_paper=Articles.objects.create(
title=title,
summary=summary,
link=link,
published_date=published_date,
source=source,
crossref_check=None
)
created=Trueifnotcreated:
ifany([science_paper.title!=title, science_paper.summary!=SciencePaper.clean_abstract(abstract=summary),
science_paper.link!=link, science_paper.published_date!=published_date]):
science_paper.title=titlescience_paper.summary=SciencePaper.clean_abstract(abstract=summary)
science_paper.link=linkscience_paper.published_date=published_datescience_paper.teams.add(source.team)
science_paper.subjects.add(source.subject)
science_paper.sources.add(source)
science_paper.save()
The text was updated successfully, but these errors were encountered:
rss feed where the problem was found: https://pubmed.ncbi.nlm.nih.gov/rss/search/10guX6I3SqrbUeeLKSTD6FCRM44ewnrN2MKKTQLLPMHB4xNsZU/?limit=15&utm_campaign=pubmed-2&fc=20210216052009
this is in the feedreader.py file that processes the data
The text was updated successfully, but these errors were encountered: