From ef9dda27f905702d2cc427ab28c0488401bffa61 Mon Sep 17 00:00:00 2001 From: Nick Budak Date: Mon, 27 Mar 2023 14:56:59 -0700 Subject: [PATCH 1/3] Avoid nesting cloned OGM repos inside themselves Providing both `directory` and `path` to Git.clone results in the repo being cloned into a directory nested inside itself. This may be a fix for #144. --- lib/geo_combine/harvester.rb | 2 +- spec/lib/geo_combine/harvester_spec.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/geo_combine/harvester.rb b/lib/geo_combine/harvester.rb index ab68524..92737f1 100644 --- a/lib/geo_combine/harvester.rb +++ b/lib/geo_combine/harvester.rb @@ -73,7 +73,7 @@ def clone(repo = nil) return 0 if File.directory? repo_path repo_url = "https://github.com/OpenGeoMetadata/#{repo}.git" - Git.clone(repo_url, repo, path: repo_path, depth: 1) && 1 + Git.clone(repo_url, nil, path: ogm_path, depth: 1) && 1 end private diff --git a/spec/lib/geo_combine/harvester_spec.rb b/spec/lib/geo_combine/harvester_spec.rb index 6525900..5951cb7 100644 --- a/spec/lib/geo_combine/harvester_spec.rb +++ b/spec/lib/geo_combine/harvester_spec.rb @@ -75,9 +75,9 @@ harvester.clone(repo_name) expect(Git).to have_received(:clone).with( repo_url, - repo_name, { + nil, { depth: 1, # shallow clone - path: repo_path + path: harvester.ogm_path } ) end From 3f4389e4d13283ea1a0144e3b2b5458566c64161 Mon Sep 17 00:00:00 2001 From: Huda Khan Date: Thu, 30 Mar 2023 17:07:26 -0400 Subject: [PATCH 2/3] removing recursion from clone and pull, creating clone_all and pull_all --- lib/geo_combine/harvester.rb | 26 +++++++++------ lib/tasks/geo_combine.rake | 4 +-- spec/lib/geo_combine/harvester_spec.rb | 44 ++++++++++++++------------ 3 files changed, 43 insertions(+), 31 deletions(-) diff --git a/lib/geo_combine/harvester.rb b/lib/geo_combine/harvester.rb index 92737f1..c634c56 100644 --- a/lib/geo_combine/harvester.rb +++ b/lib/geo_combine/harvester.rb @@ -53,22 +53,24 @@ def docs_to_index end end - # Update a repository via git, or all repositories if none specified. - # If the repository doesn't exist, clone it. Return the count of repositories updated. - def pull(repo = nil) - return repositories.map(&method(:pull)).reduce(:+) unless repo - + # Update a repository via git + # If the repository doesn't exist, clone it. + def pull(repo) repo_path = File.join(@ogm_path, repo) clone(repo) unless File.directory? repo_path Git.open(repo_path).pull && 1 end - # Clone a repository via git, or all repositories if none specified. - # If the repository already exists, skip it. Return the count of repositories cloned. - def clone(repo = nil) - return repositories.map(&method(:clone)).reduce(:+) unless repo + # Update all repositories + # Return the count of repositories updated + def pull_all + repositories.map(&method(:pull)).reduce(:+) + end + # Clone a repository via git + # If the repository already exists, skip it. + def clone(repo) repo_path = File.join(@ogm_path, repo) return 0 if File.directory? repo_path @@ -76,6 +78,12 @@ def clone(repo = nil) Git.clone(repo_url, nil, path: ogm_path, depth: 1) && 1 end + # Clone all repositories via git + # Return the count of repositories cloned. + def clone_all + repositories.map(&method(:clone)).reduce(:+) + end + private # List of repository names to harvest diff --git a/lib/tasks/geo_combine.rake b/lib/tasks/geo_combine.rake index 4c4f167..182d5d4 100644 --- a/lib/tasks/geo_combine.rake +++ b/lib/tasks/geo_combine.rake @@ -12,14 +12,14 @@ namespace :geocombine do desc 'Clone OpenGeoMetadata repositories' task :clone, [:repo] do |_t, args| harvester = GeoCombine::Harvester.new - total = harvester.clone(args.repo) + total = args[:repo] ? harvester.clone(args.repo) : harvester.clone_all puts "Cloned #{total} repositories" end desc '"git pull" OpenGeoMetadata repositories' task :pull, [:repo] do |_t, args| harvester = GeoCombine::Harvester.new - total = harvester.pull(args.repo) + total = args[:repo] ? harvester.pull(args.repo) : harvester.pull_all puts "Updated #{total} repositories" end diff --git a/spec/lib/geo_combine/harvester_spec.rb b/spec/lib/geo_combine/harvester_spec.rb index 5951cb7..7098c1b 100644 --- a/spec/lib/geo_combine/harvester_spec.rb +++ b/spec/lib/geo_combine/harvester_spec.rb @@ -49,24 +49,26 @@ expect(stub_repo).to have_received(:pull) end + it 'clones a repo before pulling if it does not exist' do + harvester.pull(repo_name) + expect(Git).to have_received(:clone) + end + end + + describe '#pull_all' do it 'can pull all repositories' do - harvester.pull + harvester.pull_all expect(Git).to have_received(:open).exactly(2).times expect(stub_repo).to have_received(:pull).exactly(2).times end - it 'skips repositories in the denylist' do - harvester.pull - expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/aardvark.git') - end - - it 'clones a repo before pulling if it does not exist' do - harvester.pull(repo_name) - expect(Git).to have_received(:clone) + it 'returns the count of repositories pulled' do + expect(harvester.pull_all).to eq(2) end - it 'returns the count of repositories pulled' do - expect(harvester.pull).to eq(2) + it 'skips repositories in the denylist' do + harvester.pull_all + expect(Git).not_to have_received(:open).with('https://github.com/OpenGeoMetadata/aardvark.git') end end @@ -82,24 +84,26 @@ ) end + it 'skips repositories that already exist' do + allow(File).to receive(:directory?).with(repo_path).and_return(true) + harvester.clone(repo_name) + expect(Git).not_to have_received(:clone) + end + end + + describe '#clone_all' do it 'can clone all repositories' do - harvester.clone + harvester.clone_all expect(Git).to have_received(:clone).exactly(2).times end it 'skips repositories in the denylist' do - harvester.pull + harvester.clone_all expect(Git).not_to have_received(:clone).with('https://github.com/OpenGeoMetadata/aardvark.git') end - it 'skips repositories that already exist' do - allow(File).to receive(:directory?).with(repo_path).and_return(true) - harvester.clone(repo_name) - expect(Git).not_to have_received(:clone) - end - it 'returns the count of repositories cloned' do - expect(harvester.clone).to eq(2) + expect(harvester.clone_all).to eq(2) end end end From b2f00e658a0adcf25e4e2c8672353f12c7f490c2 Mon Sep 17 00:00:00 2001 From: Huda Khan Date: Thu, 30 Mar 2023 17:09:51 -0400 Subject: [PATCH 3/3] trailing space deletion --- lib/geo_combine/harvester.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/geo_combine/harvester.rb b/lib/geo_combine/harvester.rb index c634c56..ad997d6 100644 --- a/lib/geo_combine/harvester.rb +++ b/lib/geo_combine/harvester.rb @@ -54,7 +54,7 @@ def docs_to_index end # Update a repository via git - # If the repository doesn't exist, clone it. + # If the repository doesn't exist, clone it. def pull(repo) repo_path = File.join(@ogm_path, repo) clone(repo) unless File.directory? repo_path