Skip to content

Commit bdfecfb

Browse files
authored
feat: incremental refresh for extracts (#1545)
* implement incremental refresh * add sample that creates an incremental extract/runs 'refresh now'
1 parent e3f1e22 commit bdfecfb

File tree

7 files changed

+90
-31
lines changed

7 files changed

+90
-31
lines changed

Diff for: samples/create_extract_task.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ def main():
2929
help="desired logging level (set to error by default)",
3030
)
3131
# Options specific to this sample:
32-
# This sample has no additional options, yet. If you add some, please add them here
32+
parser.add_argument("resource_type", choices=["workbook", "datasource"])
33+
parser.add_argument("resource_id")
34+
parser.add_argument("--incremental", default=False)
3335

3436
args = parser.parse_args()
3537

@@ -45,6 +47,7 @@ def main():
4547
# Monthly Schedule
4648
# This schedule will run on the 15th of every month at 11:30PM
4749
monthly_interval = TSC.MonthlyInterval(start_time=time(23, 30), interval_value=15)
50+
print(monthly_interval)
4851
monthly_schedule = TSC.ScheduleItem(
4952
None,
5053
None,
@@ -53,18 +56,20 @@ def main():
5356
monthly_interval,
5457
)
5558

56-
# Default to using first workbook found in server
57-
all_workbook_items, pagination_item = server.workbooks.get()
58-
my_workbook: TSC.WorkbookItem = all_workbook_items[0]
59+
my_workbook: TSC.WorkbookItem = server.workbooks.get_by_id(args.resource_id)
5960

6061
target_item = TSC.Target(
6162
my_workbook.id, # the id of the workbook or datasource
6263
"workbook", # alternatively can be "datasource"
6364
)
6465

65-
extract_item = TSC.TaskItem(
66+
refresh_type = "FullRefresh"
67+
if args.incremental:
68+
refresh_type = "Incremental"
69+
70+
scheduled_extract_item = TSC.TaskItem(
6671
None,
67-
"FullRefresh",
72+
refresh_type,
6873
None,
6974
None,
7075
None,
@@ -74,7 +79,7 @@ def main():
7479
)
7580

7681
try:
77-
response = server.tasks.create(extract_item)
82+
response = server.tasks.create(scheduled_extract_item)
7883
print(response)
7984
except Exception as e:
8085
print(e)

Diff for: samples/extracts.py

+37-9
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@ def main():
2525
help="desired logging level (set to error by default)",
2626
)
2727
# Options specific to this sample
28-
parser.add_argument("--delete")
29-
parser.add_argument("--create")
28+
parser.add_argument("--create", action="store_true")
29+
parser.add_argument("--delete", action="store_true")
30+
parser.add_argument("--refresh", action="store_true")
31+
parser.add_argument("--workbook", required=False)
32+
parser.add_argument("--datasource", required=False)
3033
args = parser.parse_args()
3134

3235
# Set logging level based on user input, or error by default
@@ -39,20 +42,45 @@ def main():
3942
server.add_http_options({"verify": False})
4043
server.use_server_version()
4144
with server.auth.sign_in(tableau_auth):
42-
# Gets all workbook items
43-
all_workbooks, pagination_item = server.workbooks.get()
44-
print(f"\nThere are {pagination_item.total_available} workbooks on site: ")
45-
print([workbook.name for workbook in all_workbooks])
4645

47-
if all_workbooks:
48-
# Pick one workbook from the list
49-
wb = all_workbooks[3]
46+
wb = None
47+
ds = None
48+
if args.workbook:
49+
wb = server.workbooks.get_by_id(args.workbook)
50+
if wb is None:
51+
raise ValueError(f"Workbook not found for id {args.workbook}")
52+
elif args.datasource:
53+
ds = server.datasources.get_by_id(args.datasource)
54+
if ds is None:
55+
raise ValueError(f"Datasource not found for id {args.datasource}")
56+
else:
57+
# Gets all workbook items
58+
all_workbooks, pagination_item = server.workbooks.get()
59+
print(f"\nThere are {pagination_item.total_available} workbooks on site: ")
60+
print([workbook.name for workbook in all_workbooks])
61+
62+
if all_workbooks:
63+
# Pick one workbook from the list
64+
wb = all_workbooks[3]
5065

5166
if args.create:
5267
print("create extract on wb ", wb.name)
5368
extract_job = server.workbooks.create_extract(wb, includeAll=True)
5469
print(extract_job)
5570

71+
if args.refresh:
72+
extract_job = None
73+
if ds is not None:
74+
print(f"refresh extract on datasource {ds.name}")
75+
extract_job = server.datasources.refresh(ds, includeAll=True, incremental=True)
76+
elif wb is not None:
77+
print(f"refresh extract on workbook {wb.name}")
78+
extract_job = server.workbooks.refresh(wb)
79+
else:
80+
print("no content item selected to refresh")
81+
82+
print(extract_job)
83+
5684
if args.delete:
5785
print("delete extract on wb ", wb.name)
5886
jj = server.workbooks.delete_extract(wb)

Diff for: samples/publish_workbook.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def main():
5555

5656
# Step 1: Sign in to server.
5757
tableau_auth = TSC.PersonalAccessTokenAuth(args.token_name, args.token_value, site_id=args.site)
58-
server = TSC.Server(args.server, use_server_version=True)
58+
server = TSC.Server(args.server, use_server_version=True, http_options={"verify": False})
5959
with server.auth.sign_in(tableau_auth):
6060
# Step2: Retrieve the project id, if a project name was passed
6161
if args.project is not None:

Diff for: samples/refresh.py

+25-8
Original file line numberDiff line numberDiff line change
@@ -27,34 +27,51 @@ def main():
2727
# Options specific to this sample
2828
parser.add_argument("resource_type", choices=["workbook", "datasource"])
2929
parser.add_argument("resource_id")
30+
parser.add_argument("--incremental")
31+
parser.add_argument("--synchronous")
3032

3133
args = parser.parse_args()
3234

3335
# Set logging level based on user input, or error by default
3436
logging_level = getattr(logging, args.logging_level.upper())
3537
logging.basicConfig(level=logging_level)
3638

39+
refresh_type = "FullRefresh"
40+
incremental = False
41+
if args.incremental:
42+
refresh_type = "Incremental"
43+
incremental = True
44+
3745
tableau_auth = TSC.PersonalAccessTokenAuth(args.token_name, args.token_value, site_id=args.site)
38-
server = TSC.Server(args.server, use_server_version=True)
46+
server = TSC.Server(args.server, use_server_version=True, http_options={"verify": False})
3947
with server.auth.sign_in(tableau_auth):
4048
if args.resource_type == "workbook":
4149
# Get the workbook by its Id to make sure it exists
4250
resource = server.workbooks.get_by_id(args.resource_id)
51+
print(resource)
4352

4453
# trigger the refresh, you'll get a job id back which can be used to poll for when the refresh is done
45-
job = server.workbooks.refresh(args.resource_id)
54+
job = server.workbooks.refresh(args.resource_id, incremental=incremental)
4655
else:
4756
# Get the datasource by its Id to make sure it exists
4857
resource = server.datasources.get_by_id(args.resource_id)
58+
print(resource)
59+
60+
# server.datasources.create_extract(resource)
4961

5062
# trigger the refresh, you'll get a job id back which can be used to poll for when the refresh is done
51-
job = server.datasources.refresh(resource)
63+
job = server.datasources.refresh(resource, incremental=incremental) # by default runs as a sync task,
5264

53-
print(f"Update job posted (ID: {job.id})")
54-
print("Waiting for job...")
55-
# `wait_for_job` will throw if the job isn't executed successfully
56-
job = server.jobs.wait_for_job(job)
57-
print("Job finished succesfully")
65+
print(f"{refresh_type} job posted (ID: {job.id})")
66+
if args.synchronous:
67+
# equivalent to tabcmd --synchnronous: wait for the job to complete
68+
try:
69+
# `wait_for_job` will throw if the job isn't executed successfully
70+
print("Waiting for job...")
71+
server.jobs.wait_for_job(job)
72+
print("Job finished succesfully")
73+
except Exception as e:
74+
print(f"Job failed! {e}")
5875

5976

6077
if __name__ == "__main__":

Diff for: tableauserverclient/server/endpoint/datasources_endpoint.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,11 @@ def update_connection(
187187
return connection
188188

189189
@api(version="2.8")
190-
def refresh(self, datasource_item: DatasourceItem) -> JobItem:
190+
def refresh(self, datasource_item: DatasourceItem, incremental: bool = False) -> JobItem:
191191
id_ = getattr(datasource_item, "id", datasource_item)
192192
url = f"{self.baseurl}/{id_}/refresh"
193-
empty_req = RequestFactory.Empty.empty_req()
194-
server_response = self.post_request(url, empty_req)
193+
refresh_req = RequestFactory.Task.refresh_req(incremental)
194+
server_response = self.post_request(url, refresh_req)
195195
new_job = JobItem.from_response(server_response.content, self.parent_srv.namespace)[0]
196196
return new_job
197197

Diff for: tableauserverclient/server/endpoint/workbooks_endpoint.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,16 @@ def get_by_id(self, workbook_id: str) -> WorkbookItem:
118118
return WorkbookItem.from_response(server_response.content, self.parent_srv.namespace)[0]
119119

120120
@api(version="2.8")
121-
def refresh(self, workbook_item: Union[WorkbookItem, str]) -> JobItem:
121+
def refresh(self, workbook_item: Union[WorkbookItem, str], incremental: bool = False) -> JobItem:
122122
"""
123123
Refreshes the extract of an existing workbook.
124124
125125
Parameters
126126
----------
127127
workbook_item : WorkbookItem | str
128128
The workbook item or workbook ID.
129+
incremental: bool
130+
Whether to do a full refresh or incremental refresh of the extract data
129131
130132
Returns
131133
-------
@@ -134,8 +136,8 @@ def refresh(self, workbook_item: Union[WorkbookItem, str]) -> JobItem:
134136
"""
135137
id_ = getattr(workbook_item, "id", workbook_item)
136138
url = f"{self.baseurl}/{id_}/refresh"
137-
empty_req = RequestFactory.Empty.empty_req()
138-
server_response = self.post_request(url, empty_req)
139+
refresh_req = RequestFactory.Task.refresh_req(incremental)
140+
server_response = self.post_request(url, refresh_req)
139141
new_job = JobItem.from_response(server_response.content, self.parent_srv.namespace)[0]
140142
return new_job
141143

Diff for: tableauserverclient/server/request_factory.py

+7
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,13 @@ def run_req(self, xml_request: ET.Element, task_item: Any) -> None:
11171117
# Send an empty tsRequest
11181118
pass
11191119

1120+
@_tsrequest_wrapped
1121+
def refresh_req(self, xml_request: ET.Element, incremental: bool = False) -> bytes:
1122+
task_element = ET.SubElement(xml_request, "extractRefresh")
1123+
if incremental:
1124+
task_element.attrib["incremental"] = "true"
1125+
return ET.tostring(xml_request)
1126+
11201127
@_tsrequest_wrapped
11211128
def create_extract_req(self, xml_request: ET.Element, extract_item: "TaskItem") -> bytes:
11221129
extract_element = ET.SubElement(xml_request, "extractRefresh")

0 commit comments

Comments
 (0)