Skip to content

Commit 1691275

Browse files
Initial commit (#1)
1 parent 24f74fd commit 1691275

File tree

5 files changed

+253
-0
lines changed

5 files changed

+253
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
log-*.txt
2+
metabase_data*.tgz

Gemfile

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# frozen_string_literal: true
2+
3+
source "https://rubygems.org"
4+
5+
gem 'yaml', '~> 0.3.0'
6+
gem 'nanoid', '~> 2.0'
7+
gem 'slop', '~> 4.10'

Gemfile.lock

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
GEM
2+
remote: https://rubygems.org/
3+
specs:
4+
nanoid (2.0.0)
5+
slop (4.10.1)
6+
yaml (0.3.0)
7+
8+
PLATFORMS
9+
arm64-darwin-22
10+
ruby
11+
12+
DEPENDENCIES
13+
nanoid (~> 2.0)
14+
slop (~> 4.10)
15+
yaml (~> 0.3.0)
16+
17+
BUNDLED WITH
18+
2.5.3

README.md

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# About
2+
3+
This is a prototype to do deep copies of Metabase collections.
4+
5+
## Requirements
6+
* A Metabase instance with a Pro license
7+
* A collection to clone
8+
9+
## Installation
10+
11+
* Make sure you have Ruby installed
12+
* Install the dependencies with `bundle install`
13+
* Test if the dependencies are satisfied by running `ruby clone.rb --help`.
14+
15+
## Supported use cases and usage
16+
17+
1. clone a collection (to import it within the origin Metabase instance), optionally updating the underlying data source:
18+
2. Update the data source, to use this within the origin or in a different destination Metabase instance
19+
20+
## Limitations
21+
22+
It can only swap out one data source at a time. If the items in your collections depend on multiple data sources, you might have to do several runs. This scenario has not been tested yet.
23+
24+
## Example usage
25+
26+
1. Locate the ID of the collection to use as template. In this case, it's 9 and export it via serialization API:
27+
28+
```sh
29+
curl \ -H 'x-api-key: API_KEY' \
30+
-X POST 'http://localhost:3000/api/ee/serialization/export?settings=false&data_model=false&collection=9' \
31+
-o metabase_data.tgz
32+
```
33+
34+
2. Extract tarball
35+
36+
```sh
37+
tar xzf metabase_data.tgz
38+
```
39+
40+
3. Run this tool pointing to the directory extracted from the tarball, and providing the source and target names for the data source names.
41+
42+
If you are planning to load the result into the same instance, you will want to add the `--duplicate` parameter, otherwise you will override the source collection on import.
43+
44+
```sh
45+
ruby clone.rb -y foobar-2024-08-09_20-57 -s DVD Rental -t DVD Rental 2 -n Super fancy new collection -d
46+
```
47+
4. Create a tarball with the modified files
48+
49+
```sh
50+
tar -czf metabase_data_modified.tgz foobar-2024-08-09_20-57
51+
```
52+
53+
5. Import the tarball via serialization API
54+
55+
```sh
56+
curl -X POST \ -H 'x-api-key: API_KEY' \
57+
-F file=@metabase_data_modified.tgz \
58+
'http://localhost:3000/api/ee/serialization/import' \
59+
-o -
60+
```

clone.rb

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# dependencies
2+
require 'yaml'
3+
require 'nanoid'
4+
require 'slop'
5+
require 'logger'
6+
7+
@log = Logger.new("log-#{Time.now}.txt")
8+
9+
@opts = Slop.parse do |o|
10+
o.string '-y', '--yaml-files', 'the directory containing the yaml files', required: true
11+
o.string '-s', '--source-datasource', 'the datasource to be replaced', required: true
12+
o.string '-t', '--target-datasource', 'the datasource to replace with', required: true
13+
o.string '-n', '--new-collection-name', 'the new name for the top collection', default: nil
14+
o.bool '-d', '--duplicate', 'duplicate the collections', default: false
15+
o.on '--help' do
16+
puts o
17+
exit
18+
end
19+
end
20+
21+
22+
# arguments
23+
#@source_dir = File.absolute_path(ARGV[0]) # original files
24+
#@source_datasource = ARGV[1] # original datasource
25+
#@target_datasource = ARGV[2] # modified datasource
26+
#@new_top_collection_name = ARGV[3]
27+
#@duplicate = ARGV[4]=="--duplicate"
28+
29+
@num_collections = 0
30+
@num_dash = 0
31+
@num_questions = 0
32+
@num_models = 0
33+
34+
########
35+
# TODO: Go through each card file in the source directory
36+
37+
# Example directory: site-2024-07-30_17-44/collections/_s2ntQI8-FhJGatZY3nEn_automatically_generated_dashboards/**/cards/*.yaml
38+
@entity_id_map = {}
39+
def entity_id_for(entity_id, remap=true)
40+
raise "Attempting to map for empty entity_id" if entity_id.nil? || entity_id.empty?
41+
if @entity_id_map.has_key?(entity_id)
42+
elsif remap
43+
@entity_id_map[entity_id] = Nanoid.generate
44+
@log.debug "Generated new entity ID: #{entity_id} -> #{@entity_id_map[entity_id]}"
45+
else
46+
# do not map to new entity id
47+
@entity_id_map[entity_id] = entity_id
48+
end
49+
return @entity_id_map[entity_id]
50+
end
51+
52+
def update_entity_ids(entity, type=nil, update_parent=true)
53+
if type=="question" || type=="model" || type=="dashboard"
54+
entity["entity_id"] = entity_id_for(entity["entity_id"]) if entity.has_key?("entity_id")
55+
entity["serdes/meta"][0]["id"] = entity_id_for(entity["serdes/meta"][0]["id"]) if entity["serdes/meta"].is_a?(Array) && entity["serdes/meta"][0].has_key?("id")
56+
entity["collection_id"] = entity_id_for(entity["collection_id"]) if entity.has_key?("collection_id")
57+
end
58+
if type=="question" || type=="model"
59+
traverse(entity) do |node|
60+
if node && node.is_a?(Hash) && node.has_key?("source-table") && node["source-table"].is_a?(String)
61+
node["source-table"] = entity_id_for(node["source-table"])
62+
end
63+
end
64+
end
65+
if type=="dashboard"
66+
# Update entity ids of dashcards
67+
entity.has_key?("dashcards") && entity["dashcards"].each do |dashcard|
68+
dashcard["entity_id"] = entity_id_for(dashcard["entity_id"]) if dashcard.has_key?("entity_id")
69+
dashcard["card_id"] = entity_id_for(dashcard["card_id"]) if dashcard.has_key?("card_id") && !dashcard["card_id"].nil?
70+
dashcard.has_key?("parameter_mappings") && dashcard["parameter_mappings"].each do |param_map|
71+
param_map["card_id"] = entity_id_for(param_map["card_id"]) if param_map.has_key?("card_id")
72+
traverse(param_map) do |node|
73+
if node.is_a?(Array) && node[0] == @opts["source-datasource"]
74+
node[0] = @opts["target-datasource"]
75+
end
76+
end
77+
end
78+
if dashcard["visualization_settings"].has_key?("click_behavior") && dashcard["visualization_settings"]["click_behavior"].has_key?("targetId")
79+
dashcard["visualization_settings"]["click_behavior"]["targetId"] = entity_id_for(dashcard["visualization_settings"]["click_behavior"]["targetId"])
80+
end
81+
end
82+
elsif type=="collection"
83+
entity["entity_id"] = entity_id_for(entity["entity_id"]) if entity.has_key?("entity_id")
84+
entity["serdes/meta"][0]["id"] = entity_id_for(entity["serdes/meta"][0]["id"]) if entity["serdes/meta"].is_a?(Array) && entity["serdes/meta"][0].has_key?("id")
85+
entity["parent_id"] = entity_id_for(entity["parent_id"]) if update_parent && !entity["parent_id"].nil?
86+
end
87+
end
88+
89+
def update_datasource(entity)
90+
if entity.has_key?("database_id") && entity["database_id"] == @opts["source-datasource"]
91+
entity["database_id"] = @opts["target-datasource"]
92+
end
93+
if entity.has_key?("dataset_query") && entity["dataset_query"].has_key?("database") && entity["dataset_query"]["database"] == @opts["source-datasource"]
94+
entity["dataset_query"]["database"] = @opts["target-datasource"]
95+
end
96+
for key in ["table_id", "dataset_query"] do
97+
if entity.has_key?(key) && entity[key].is_a?(Array) && entity[key][0] == @opts["source-datasource"]
98+
entity[key][0] = @opts["target-datasource"]
99+
end
100+
end
101+
traverse(entity) do |node|
102+
if node.is_a?(Array) && node[0] == @opts["source-datasource"]
103+
node[0] = @opts["target-datasource"]
104+
end
105+
end
106+
end
107+
108+
def traverse(obj,parent=nil, &blk)
109+
case obj
110+
when Hash
111+
blk.call(obj,parent) # alberto
112+
obj.each do |k,v|
113+
blk.call(k,parent)
114+
# Pass hash key as parent
115+
traverse(v,k, &blk)
116+
end
117+
when Array
118+
blk.call(obj,parent) # alberto
119+
obj.each {|v| traverse(v, parent, &blk) }
120+
else
121+
blk.call(obj,parent)
122+
end
123+
end
124+
125+
## Update top parent collection
126+
Dir.glob("#{@opts["yaml-files"]}/collections/*/*.yaml") do |yaml_file|
127+
@log.debug "Processing top level collection: #{yaml_file}"
128+
entity = YAML.load_file(yaml_file)
129+
update_entity_ids(entity, "collection", false) if @opts[:duplicate]
130+
131+
# Update name and slug
132+
entity["name"] = @opts["new-collection-name"]
133+
entity["slug"] = @opts["new-collection-name"].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
134+
entity["serdes/meta"][0]["label"] = @opts["new-collection-name"].downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
135+
@num_collections+=1
136+
File.write(yaml_file, entity.to_yaml)
137+
@log.debug "Wrote #{yaml_file}"
138+
end
139+
140+
## Update everything else
141+
Dir.glob("#{@opts["yaml-files"]}/collections/*/*/**/*.yaml") do |yaml_file|
142+
@log.debug "Processing #{yaml_file}"
143+
entity = YAML.load_file(yaml_file)
144+
145+
if entity["type"]=="question"
146+
update_entity_ids(entity, "question") if @opts[:duplicate]
147+
update_datasource(entity)
148+
@num_questions+=1
149+
elsif entity["type"]=="model"
150+
update_entity_ids(entity, "model") if @opts[:duplicate]
151+
update_datasource(entity)
152+
@num_models+=1
153+
elsif entity["serdes/meta"][0]["model"]=="Dashboard"
154+
update_entity_ids(entity, "dashboard") if @opts[:duplicate]
155+
@num_dash+=1
156+
elsif entity["serdes/meta"][0]["model"]=="Collection"
157+
update_entity_ids(entity, "collection") if @opts[:duplicate]
158+
@num_collections+=1
159+
end
160+
File.write(yaml_file, entity.to_yaml)
161+
@log.debug "Wrote #{yaml_file}"
162+
end
163+
164+
@log.debug "Processed #{@num_collections} collections, #{@num_dash} dashboards, #{@num_questions} questions."
165+
166+
@log.debug @entity_id_map.to_yaml

0 commit comments

Comments
 (0)