Skip to content

Commit 7be1f1b

Browse files
committed
add estimate_centrality_time fn; closes #90
1 parent 280bbea commit 7be1f1b

File tree

8 files changed

+160
-5
lines changed

8 files changed

+160
-5
lines changed

Diff for: DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: dodgr
22
Title: Distances on Directed Graphs
3-
Version: 0.2.5.012
3+
Version: 0.2.5.013
44
Authors@R: c(
55
person("Mark", "Padgham", email="[email protected]", role=c("aut", "cre")),
66
person("Andreas", "Petutschnig", role="aut"),

Diff for: NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ export(dodgr_to_tidygraph)
3434
export(dodgr_uncontract_graph)
3535
export(dodgr_vertices)
3636
export(estimate_centrality_threshold)
37+
export(estimate_centrality_time)
3738
export(igraph_to_dodgr)
3839
export(match_points_to_graph)
3940
export(match_pts_to_graph)

Diff for: R/centrality.R

+88
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ dodgr_centrality <- function (graph, contract = TRUE, edges = TRUE,
123123
else
124124
dirtxt <- get_random_prefix ("centrality_vert")
125125

126+
# final '0' is for sampling calculation to estimate speed - non-zero values
127+
# used only in 'estimate_centrality_time'
126128
rcpp_centrality (graph2, vert_map, heap, dirtxt, dist_threshold, edges, 0)
127129

128130
# aggregate results from the threads:
@@ -235,3 +237,89 @@ estimate_centrality_threshold <- function (graph, tolerance = 0.001)
235237
return (d)
236238
# nocov end
237239
}
240+
241+
#' estimate_centrality_time
242+
#'
243+
#' The 'dodgr' centrality functions are designed to be applied to potentially
244+
#' very large graphs, and may take considerable time to execute. This helper
245+
#' function estimates how long a centrality function may take for a given graph
246+
#' and given value of 'dist_threshold' estimated via the
247+
#' \link{estimate_centrality_threshold} function.
248+
#'
249+
#' @inheritParams dodgr_centrality
250+
#' @return An estimated calculation time for calculating centrality for the
251+
#' given value of 'dist_threshold'
252+
#'
253+
#' @note This function may take some time to execute. While running, it displays
254+
#' ongoing information on screen of estimated values of 'dist_threshold' and
255+
#' associated errors. Thresholds are progressively increased until the error is
256+
#' reduced below the specified tolerance.
257+
#'
258+
#' @export
259+
estimate_centrality_time <- function (graph, contract = TRUE, edges = TRUE,
260+
dist_threshold = NULL, heap = "BHeap")
261+
{
262+
# copies all code from dodgr_centrality, but uses the otherwise non-exposed
263+
# 'sample' parameter passed through to C++ routines
264+
if (is.null (dist_threshold))
265+
dist_threshold <- .Machine$double.xmax
266+
267+
hps <- get_heap (heap, graph)
268+
heap <- hps$heap
269+
graph <- hps$graph
270+
271+
gr_cols <- dodgr_graph_cols (graph)
272+
273+
if (contract & methods::is (graph, "dodgr_contracted"))
274+
contract <- FALSE
275+
if (contract & !methods::is (graph, "dodgr_contracted"))
276+
{
277+
graph_full <- graph
278+
graph <- dodgr_contract_graph (graph)
279+
hashc <- get_hash (graph, hash = FALSE)
280+
fname_c <- file.path (tempdir (),
281+
paste0 ("dodgr_edge_map_", hashc, ".Rds"))
282+
if (!file.exists (fname_c))
283+
stop ("something went wrong extracting the edge_map ... ") # nocov
284+
edge_map <- readRDS (fname_c)
285+
}
286+
287+
vert_map <- make_vert_map (graph, gr_cols)
288+
289+
graph2 <- convert_graph (graph, gr_cols)
290+
291+
# centrality calculation, done in parallel with each thread dumping results
292+
# to files in tempdir()
293+
if (edges)
294+
dirtxt <- get_random_prefix ("centrality_edge")
295+
else
296+
dirtxt <- get_random_prefix ("centrality_vert")
297+
298+
# final '0' is for sampling calculation to estimate speed - non-zero values
299+
# used only in 'estimate_centrality_time'
300+
st <- system.time (
301+
rcpp_centrality (graph2, vert_map, heap, dirtxt,
302+
dist_threshold, edges, 100)
303+
) [3]
304+
305+
# remove files otherwise used to aggregate results from the threads:
306+
f <- list.files (tempdir (), full.names = TRUE)
307+
files <- f [grep (dirtxt, f)]
308+
junk <- file.remove (files) # nolint
309+
310+
# convert to estimated time for full graph
311+
st <- st * nrow (graph) / 100
312+
313+
hh <- floor (st / 3600)
314+
st <- st - 3600 * hh
315+
mm <- floor (st / 60)
316+
ss <- round (st - 60 * mm)
317+
318+
hh <- sprintf ("%02d", hh)
319+
mm <- sprintf ("%02d", mm)
320+
ss <- sprintf ("%02d", ss)
321+
res <- paste0 (hh, ":", mm, ":", ss)
322+
message ("Estimated time to calculate centrality for full graph is ",
323+
res)
324+
invisible (res)
325+
}

Diff for: _pkgdown.yml

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ reference:
4040
contents:
4141
- dodgr_centrality
4242
- esimtate_centrality_threshold
43+
- esimtate_centrality_time
4344
- title: Miscellaneous Functions
4445
contents:
4546
- compare_heaps

Diff for: codemeta.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"codeRepository": "https://github.com/ATFutures/dodgr",
1111
"issueTracker": "https://github.com/ATFutures/dodgr/issues",
1212
"license": "https://spdx.org/licenses/GPL-3.0",
13-
"version": "0.2.5.12",
13+
"version": "0.2.5.13",
1414
"programmingLanguage": {
1515
"@type": "ComputerLanguage",
1616
"name": "R",
@@ -392,5 +392,5 @@
392392
}
393393
],
394394
"relatedLink": "https://CRAN.R-project.org/package=dodgr",
395-
"fileSize": "8610.897KB"
395+
"fileSize": "8611.651KB"
396396
}

Diff for: man/estimate_centrality_time.Rd

+54
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: src/centrality.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -380,22 +380,26 @@ void rcpp_centrality (const Rcpp::DataFrame graph,
380380
std::shared_ptr <DGraph> g = std::make_shared <DGraph> (nverts);
381381
inst_graph (g, nedges, vert_map, from, to, dist, wt);
382382

383+
size_t nverts_to_use = nverts;
384+
if (sample > 0)
385+
nverts_to_use = static_cast <size_t> (sample);
386+
383387
// Create parallel worker
384388
if (edge_centrality)
385389
{
386390
OneCentralityEdge one_centrality (nverts, nedges, dirtxt, heap_type,
387391
dist_threshold, g);
388392

389393
GetRNGstate (); // Initialise R random seed
390-
RcppParallel::parallelFor (0, nverts, one_centrality);
394+
RcppParallel::parallelFor (0, nverts_to_use, one_centrality);
391395
PutRNGstate ();
392396
} else // vertex centrality
393397
{
394398
OneCentralityVert one_centrality (nverts, dirtxt, heap_type,
395399
dist_threshold, g);
396400

397401
GetRNGstate (); // Initialise R random seed
398-
RcppParallel::parallelFor (0, nverts, one_centrality);
402+
RcppParallel::parallelFor (0, nverts_to_use, one_centrality);
399403
PutRNGstate ();
400404
}
401405
}

Diff for: tests/testthat/test-centrality.R

+7
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,10 @@ test_that("centrality", {
2222
expect_true ("centrality" %in% names (vc))
2323
expect_false ("centrality" %in% names (v))
2424
})
25+
26+
test_that("estimate time", {
27+
graph <- weight_streetnet (hampi)
28+
expect_message (x <- estimate_centrality_time (graph),
29+
"Estimated time to calculate centrality for full graph is")
30+
expect_identical (x, "00:00:00")
31+
})

0 commit comments

Comments
 (0)