Skip to content

Commit

Permalink
initial work before i go home
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Dec 19, 2024
1 parent 5737396 commit 88eaacf
Show file tree
Hide file tree
Showing 12 changed files with 608 additions and 29 deletions.
522 changes: 522 additions & 0 deletions scripts/download_files.py

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions server/lib/genome/importers/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@ def default_filename
'claims'
end

def default_data_dir
"#{Dir.home}/.local/share/wags_tails"
end

def handle_file_location(file_path)
return file_path unless file_path.nil?

dir_name = self.class.name.split('::')[-2].underscore
"lib/data/#{dir_name}/#{default_filename}.#{default_filetype}"
src_name = self.class.name.split('::')[-2].underscore
"#{default_data_dir}/#{src_name}/#{src_name}_#{default_filename}.#{default_filetype}"
end

def remove_existing_source
Expand Down
9 changes: 4 additions & 5 deletions server/lib/genome/importers/file_importers/chembl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@ def initialize(file_path)
@source_db_name = "ChEMBL"
end

def default_filetype
'db'
end
def handle_file_location(file_path)
return file_path unless file_path.nil?

def default_filename
'chembl'
directory = "#{default_data_dir}/chembl/"
Dir.glob(File.join(directory, 'chembl_*.db')).max_by { |file| file.match(/chembl_(\d+)\.db/)[1].to_i rescue 0 }
end

def create_claims
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def create_new_source
@source.save
end

def handle_file_location(file_path)
return file_path unless file_path.nil?

"#{Dir.home}/.local/share/wags_tails/clearity_foundation/clearity_foundation_biomarkers_claims.tsv"
end

def create_interaction_claims
CSV.foreach(file_path, headers: true, col_sep: "\t") do |row|
gene_claim = create_gene_claim(row['gene_name'].upcase)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def create_new_source
@source.save
end

def handle_file_location(file_path)
return file_path unless file_path.nil?

"#{Dir.home}/.local/share/wags_tails/clearity_foundation/clearity_foundation_clinical_trial_claims.tsv"
end

def create_interaction_claims
CSV.foreach(file_path, headers: true, col_sep: "\t") do |row|
next if row['Entrez Gene Name'] == 'N/A' || row['Pubchem name'] == 'N/A'
Expand Down
12 changes: 6 additions & 6 deletions server/lib/genome/importers/file_importers/docm.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Importer < Genome::Importers::Base

def initialize(tsv_root_path)
@tsv_root = if tsv_root_path.nil?
'lib/data/docm/'
"#{default_data_dir}/docm/"
else
tsv_root_path
end
Expand Down Expand Up @@ -49,37 +49,37 @@ def create_new_source


def create_drug_claims
CSV.foreach("#{@tsv_root}drug_claim.csv", headers: true, col_sep: ',') do |row|
CSV.foreach("#{@tsv_root}docm_drug_claims.csv", headers: true, col_sep: ',') do |row|
dc = create_drug_claim(row[0])
@drug_claims[row[0]] = dc
end
end

def create_gene_claims
CSV.foreach("#{@tsv_root}gene_claim.csv", headers: true, col_sep: ',') do |row|
CSV.foreach("#{@tsv_root}docm_gene_claims.csv", headers: true, col_sep: ',') do |row|
gc = create_gene_claim(row[0], GeneNomenclature::NCBI_NAME)
@gene_claims[row[0]] = gc
end
end

def create_interaction_claims
CSV.foreach("#{@tsv_root}interaction_claim.csv", headers: true, col_sep: ',') do |row|
CSV.foreach("#{@tsv_root}docm_interaction_claims.csv", headers: true, col_sep: ',') do |row|
gc = @gene_claims[row[1]]
dc = @drug_claims[row[0]]
next if gc.nil? || dc.nil?

ic = create_interaction_claim(gc, dc)
@interaction_claims[[gc, dc]] = ic
end
CSV.foreach("#{@tsv_root}interaction_claim_attributes.csv", headers: true, col_sep: ',') do |row|
CSV.foreach("#{@tsv_root}docm_interaction_claim_attributes.csv", headers: true, col_sep: ',') do |row|
gc = @gene_claims[row[3]]
dc = @drug_claims[row[2]]
next if gc.nil? || dc.nil?

ic = @interaction_claims[[gc, dc]]
create_interaction_claim_attribute(ic, row[0], row[1])
end
CSV.foreach("#{@tsv_root}interaction_claim_publications.csv", headers: true, col_sep: ',') do |row|
CSV.foreach("#{@tsv_root}docm_interaction_claim_publications.csv", headers: true, col_sep: ',') do |row|
gc = @gene_claims[row[3]]
dc = @drug_claims[row[2]]
next if gc.nil? || dc.nil?
Expand Down
11 changes: 9 additions & 2 deletions server/lib/genome/importers/file_importers/drugbank.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,15 @@ def initialize(file_path)
@source_db_name = 'DrugBank'
end

def default_filetype
'xml'
def handle_file_location(file_path)
return file_path unless file_path.nil?

directory = "#{default_data_dir}/drugbank/"
Dir.glob(File.join(directory, 'drugbank_*.xml'))
.max_by do |file|
match = file.match(/drugbank_(\d+)\.(\d+)\.(\d+)\.xml/)
match ? [match[1].to_i, match[2].to_i, match[3].to_i] : [0, 0, 0]
end
end

def run_parser
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
include ActionView::Helpers::SanitizeHelper

module Genome; module Importers; module FileImporters; module GuideToPharmacology;
# gene_file_path should point to `targets_and_families.csv`
# interaction_file_path should point to `interactions.csv`
# gene_file_path should point to `gtop_targets_and_families_*.tsv`
# interaction_file_path should point to `gtop_interactions_*.tsv`
class Importer < Genome::Importers::Base
attr_reader :interaction_file_path, :gene_file_path, :target_to_entrez

def initialize(interaction_file_path, gene_file_path)
@interaction_file_path = interaction_file_path
@gene_file_path = gene_file_path
@interaction_file_path = handle_gtop_file_location(interaction_file_path, "interactions")
@gene_file_path = handle_gtop_file_location(gene_file_path, "targets_and_families")
@target_to_entrez = {}
@source_db_name = 'GuideToPharmacology'
end
Expand All @@ -20,6 +20,16 @@ def create_claims

private

def handle_gtop_file_location(file_path, datatype)
return file_path unless file_path.nil?

raise "Unrecognized GtoP datatype: #{datatype}" unless %w[targets_and_families interactions].include? datatype

directory = "#{default_data_dir}/guidetopharmacology/"
Dir.glob(File.join(directory, "gtop_#{datatype}_*.tsv"))
.max_by { |file| file.match(/gtop_#{datatype}_(\d+)\.db/)[1].to_i rescue 0 }
end

def get_version
version = ''
File.open(@interaction_file_path, 'r') do |file|
Expand Down Expand Up @@ -57,7 +67,7 @@ def create_new_source
def import_gene_claims
refseq_id_pattern = /^((AC|AP|NC|NG|NM|NP|NR|NT|NW|XM|XP|XR|YP|ZP)_\d+|(NZ\_[A-Z]{4}\d+))(\.\d+)?$/

CSV.foreach(gene_file_path, headers: true, skip_lines: /GtoPdb Version/) do |line|
CSV.foreach(gene_file_path, headers: true, skip_lines: /GtoPdb Version/, col_sep: "\t") do |line|
gene_lui = line['Human Entrez Gene']
next if blank?(gene_lui) || gene_lui.include?('|')

Expand Down Expand Up @@ -111,7 +121,7 @@ def import_gene_claims
end

def import_interaction_claims
CSV.foreach(interaction_file_path, headers: true, skip_lines: /GtoPdb Version/) do |line|
CSV.foreach(interaction_file_path, headers: true, skip_lines: /GtoPdb Version/, col_sep: "\t") do |line|
next unless valid_interaction_line?(line)

gene_claim = create_gene_claim("NCBIGENE:#{line['Target ID']}", GeneNomenclature::NCBI_ID)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ def create_claims
end

private
def default_filename
'clinical_trial_claims'
end

def handle_file_location(file_path)
return file_path unless file_path.nil?

src_name = "my_cancer_genome"
"#{default_data_dir}/#{src_name}/#{src_name}_#{default_filename}.#{default_filetype}"
end

def create_new_source
@source ||= Source.create(
{
Expand Down
10 changes: 5 additions & 5 deletions server/lib/genome/importers/file_importers/oncokb.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
module Genome; module Importers; module FileImporters; module Oncokb;
class Importer < Genome::Importers::Base
def initialize(tsv_root_path)
if tsv_root_path.nil?
@tsv_root = 'lib/data/oncokb/'
else
@tsv_root = tsv_root_path
end
@tsv_root = if tsv_root_path.nil?
"#{default_data_dir}/oncokb/"
else
tsv_root_path
end
@source_db_name = 'OncoKB'
@drug_claims = {}
@gene_claims = {}
Expand Down
16 changes: 15 additions & 1 deletion server/lib/genome/importers/file_importers/pharmgkb.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,20 @@ def create_claims

private

def handle_file_location(file_path)
return file_path unless file_path.nil?

directory = "#{default_data_dir}/pharmgkb/"
Dir.glob(File.join(directory, 'pharmgkb_*.tsv')).max_by { |file| file.match(/chembl_(\d+)\.db/)[1].to_i rescue 0 }
end


def get_version
match = @file_path.match(/(\d{8})/)
match ? match[1] : nil
end


def create_new_source
@source ||= Source.create(
{
Expand All @@ -23,7 +37,7 @@ def create_new_source
pmid: '34216021',
pmcid: 'PMC8457105',
doi: '10.1002/cpt.2350',
source_db_version: '2024-04-05', # using static file, see issue #420
source_db_version: get_version,
source_db_name: source_db_name,
full_name: 'PharmGKB - The Pharmacogenomics Knowledgebase',
license: License::CC_BY_SA_4_0,
Expand Down
4 changes: 2 additions & 2 deletions server/lib/tasks/importers.rake
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ namespace :dgidb do
# Guide to Pharmacology is a special case because it needs two input files
def run_gtop_import(args)
args.with_defaults(
interaction_file_path: 'lib/data/guide_to_pharmacology/interactions.csv',
gene_file_path: 'lib/data/guide_to_pharmacology/targets_and_families.csv',
interaction_file_path: nil,
gene_file_path: nil,
gene_group: false,
drug_group: false
)
Expand Down

0 comments on commit 88eaacf

Please sign in to comment.