@@ -14,16 +14,16 @@ The chapter uses the following packages:
1414library(sf )
1515library(terra )
1616library(dplyr )
17- library(data.table ) # fast data frame manipulation (used by mlr3)
18- library(mlr3 ) # machine learning (see Chapter 12)
19- library(mlr3spatiotempcv ) # spatiotemporal resampling
20- library(mlr3tuning ) # hyperparameter tuning package
21- library(mlr3learners ) # interface to most important machine learning pkgs
22- library(paradox ) # defining hyperparameter spaces
23- library(ranger ) # random forest package
24- library(qgisprocess ) # bridge to QGIS (Chapter 10)
25- library(tree ) # decision tree package
26- library(vegan ) # community ecology package
17+ library(data.table ) # fast data frame manipulation (used by mlr3)
18+ library(mlr3 ) # machine learning (see Chapter 12)
19+ library(mlr3spatiotempcv ) # spatiotemporal resampling
20+ library(mlr3tuning ) # hyperparameter tuning package
21+ library(mlr3learners ) # interface to most important machine learning pkgs
22+ library(paradox ) # defining hyperparameter spaces
23+ library(ranger ) # random forest package
24+ library(qgisprocess ) # bridge to QGIS (Chapter 10)
25+ library(tree ) # decision tree package
26+ library(vegan ) # community ecology package
2727```
2828
2929## Introduction
@@ -66,8 +66,8 @@ All the data needed for the subsequent analyses is available via the **spDataLar
6666
6767``` r
6868data(" study_area" , " random_points" , " comm" , package = " spDataLarge" )
69- dem <- rast(system.file(" raster/dem.tif" , package = " spDataLarge" ))
70- ndvi <- rast(system.file(" raster/ndvi.tif" , package = " spDataLarge" ))
69+ dem = rast(system.file(" raster/dem.tif" , package = " spDataLarge" ))
70+ ndvi = rast(system.file(" raster/ndvi.tif" , package = " spDataLarge" ))
7171```
7272
7373` study_area ` is a polygon representing the outline of the study area, and ` random_points ` is an ` sf ` object containing the 100 randomly chosen sites.
@@ -155,14 +155,13 @@ The resulting rasters\index{raster} are saved to temporary files with an `.sdat`
155155
156156``` r
157157# environmental predictors: catchment slope and catchment area
158- ep <- qgisprocess :: qgis_run_algorithm(
158+ ep = qgisprocess :: qgis_run_algorithm(
159159 alg = " sagang:sagawetnessindex" ,
160160 DEM = dem ,
161161 SLOPE_TYPE = 1 ,
162162 SLOPE = tempfile(fileext = " .sdat" ),
163163 AREA = tempfile(fileext = " .sdat" ),
164- .quiet = TRUE
165- )
164+ .quiet = TRUE )
166165```
167166
168167This returns a list named ` ep ` containing the paths to the computed output rasters.
@@ -172,35 +171,35 @@ Additionally, we will add two more raster objects to it, namely `dem` and `ndvi`
172171
173172``` r
174173# read in catchment area and catchment slope
175- ep <- ep [c(" AREA" , " SLOPE" )] | >
174+ ep = ep [c(" AREA" , " SLOPE" )] | >
176175 unlist() | >
177176 rast()
178- names(ep ) <- c(" carea" , " cslope" ) # assign better names
179- origin(ep ) <- origin(dem ) # make sure rasters have the same origin
180- ep <- c(dem , ndvi , ep ) # add dem and ndvi to the multi-layer SpatRaster object
177+ names(ep ) = c(" carea" , " cslope" ) # assign better names
178+ origin(ep ) = origin(dem ) # make sure rasters have the same origin
179+ ep = c(dem , ndvi , ep ) # add dem and ndvi to the multi-layer SpatRaster object
181180```
182181
183182Additionally, the catchment area\index{catchment area} values are highly skewed to the right (` hist(ep$carea) ` ).
184183A log10-transformation makes the distribution more normal.
185184
186185
187186``` r
188- ep $ carea <- log10(ep $ carea )
187+ ep $ carea = log10(ep $ carea )
189188```
190189
191190As a convenience to the reader, we have added ` ep ` to ** spDataLarge** :
192191
193192
194193``` r
195- ep <- rast(system.file(" raster/ep.tif" , package = " spDataLarge" ))
194+ ep = rast(system.file(" raster/ep.tif" , package = " spDataLarge" ))
196195```
197196
198197Finally, we can extract the terrain attributes to our field observations (see also Section \@ ref(raster-extraction)).
199198
200199
201200``` r
202- ep_rp <- terra :: extract(ep , random_points , ID = FALSE )
203- random_points <- cbind(random_points , ep_rp )
201+ ep_rp = terra :: extract(ep , random_points , ID = FALSE )
202+ random_points = cbind(random_points , ep_rp )
204203```
205204
206205## Reducing dimensionality {#nmds}
@@ -235,9 +234,9 @@ Hence, we need to dismiss all sites in which no species were found.
235234
236235``` r
237236# presence-absence matrix
238- pa <- vegan :: decostand(comm , " pa" ) # 100 rows (sites), 69 columns (species)
237+ pa = vegan :: decostand(comm , " pa" ) # 100 rows (sites), 69 columns (species)
239238# keep only sites in which at least one species was found
240- pa <- pa [rowSums(pa ) != 0 , ] # 84 rows, 69 columns
239+ pa = pa [rowSums(pa ) != 0 , ] # 84 rows, 69 columns
241240```
242241
243242The resulting matrix serves as input for the NMDS\index{NMDS}.
@@ -250,7 +249,7 @@ To make sure that the algorithm converges, we set the number of steps to 500 usi
250249
251250``` r
252251set.seed(25072018 )
253- nmds <- vegan :: metaMDS(comm = pa , k = 4 , try = 500 )
252+ nmds = vegan :: metaMDS(comm = pa , k = 4 , try = 500 )
254253nmds $ stress
255254# > ...
256255# > Run 498 stress 0.08834745
@@ -276,17 +275,15 @@ Plotting the result reveals that the first axis is, as intended, clearly associa
276275
277276
278277``` r
279- elev <- dplyr :: filter(random_points , id %in% rownames(pa )) | >
278+ elev = dplyr :: filter(random_points , id %in% rownames(pa )) | >
280279 dplyr :: pull(dem )
281280# rotating NMDS in accordance with altitude (proxy for humidity)
282- rotnmds <- vegan :: MDSrotate(nmds , elev )
281+ rotnmds = vegan :: MDSrotate(nmds , elev )
283282# extracting the first two axes
284- sc <- vegan :: scores(rotnmds , choices = 1 : 2 , display = " sites" )
283+ sc = vegan :: scores(rotnmds , choices = 1 : 2 , display = " sites" )
285284# plotting the first axis against altitude
286- plot(
287- y = sc [, 1 ], x = elev , xlab = " elevation in m" ,
288- ylab = " First NMDS axis" , cex.lab = 0.8 , cex.axis = 0.8
289- )
285+ plot(y = sc [, 1 ], x = elev , xlab = " elevation in m" ,
286+ ylab = " First NMDS axis" , cex.lab = 0.8 , cex.axis = 0.8 )
290287```
291288
292289<div class =" figure " style =" text-align : center " >
@@ -313,17 +310,17 @@ We will also use the resulting data frame for the **mlr3**\index{mlr3 (package)}
313310``` r
314311# construct response-predictor matrix
315312# id- and response variable
316- rp <- data.frame (id = as.numeric(rownames(sc )), sc = sc [, 1 ])
313+ rp = data.frame (id = as.numeric(rownames(sc )), sc = sc [, 1 ])
317314# join the predictors (dem, ndvi and terrain attributes)
318- rp <- inner_join(random_points , rp , by = " id" )
315+ rp = inner_join(random_points , rp , by = " id" )
319316```
320317
321318Decision trees split the predictor space into a number of regions.
322319To illustrate this, we apply a decision tree to our data using the scores of the first NMDS\index{NMDS} axis as the response (` sc ` ) and altitude (` dem ` ) as the only predictor.
323320
324321
325322``` r
326- tree_mo <- tree :: tree(sc ~ dem , data = rp )
323+ tree_mo = tree :: tree(sc ~ dem , data = rp )
327324plot(tree_mo )
328325text(tree_mo , pretty = 0 )
329326```
@@ -382,7 +379,7 @@ For specifying a spatial task, we use again the **mlr3spatiotempcv** package [@s
382379
383380``` r
384381# create task
385- task <- mlr3spatiotempcv :: as_task_regr_st(
382+ task = mlr3spatiotempcv :: as_task_regr_st(
386383 select(rp , - id , - spri ),
387384 target = " sc" ,
388385 id = " mongon"
@@ -395,7 +392,7 @@ Next, we go on to construct a random forest\index{random forest} learner from th
395392
396393
397394``` r
398- lrn_rf <- lrn(" regr.ranger" , predict_type = " response" )
395+ lrn_rf = lrn(" regr.ranger" , predict_type = " response" )
399396```
400397
401398As opposed to, for example, Support Vector Machines\index{SVM} (see Section \@ ref(svm)), random forests often already show good performances when used with the default values of their hyperparameters (which may be one reason for their popularity).
@@ -414,7 +411,7 @@ Hyperparameter\index{hyperparameter} combinations will be selected randomly but
414411
415412``` r
416413# specifying the search space
417- search_space <- paradox :: ps(
414+ search_space = paradox :: ps(
418415 mtry = paradox :: p_int(lower = 1 , upper = ncol(task $ data()) - 1 ),
419416 sample.fraction = paradox :: p_dbl(lower = 0.2 , upper = 0.9 ),
420417 min.node.size = paradox :: p_int(lower = 1 , upper = 10 )
@@ -429,7 +426,7 @@ The performance measure is the root mean squared error (RMSE\index{RMSE}).
429426
430427
431428``` r
432- autotuner_rf <- mlr3tuning :: auto_tuner(
429+ autotuner_rf = mlr3tuning :: auto_tuner(
433430 learner = lrn_rf ,
434431 resampling = mlr3 :: rsmp(" spcv_coords" , folds = 5 ), # spatial partitioning
435432 measure = mlr3 :: msr(" regr.rmse" ), # performance measure
@@ -487,10 +484,10 @@ As an alternative, you can also use the dedicated **mlr3spatial** package for do
487484
488485
489486``` r
490- pred <- terra :: predict(ep , model = autotuner_rf $ learner $ model $ model , fun = predict )
487+ pred = terra :: predict(ep , model = autotuner_rf $ learner $ model $ model , fun = predict )
491488
492489# doing the same using mlr3spatial
493- # pred <- mlr3spatial::predict_spatial(newdata = ep, learner = autotuner_rf)
490+ # pred = mlr3spatial::predict_spatial(newdata = ep, learner = autotuner_rf)
494491```
495492
496493<div class =" figure " style =" text-align : center " >
@@ -502,15 +499,15 @@ In case, `terra::predict()` does not support a model algorithm, you can still ma
502499
503500
504501``` r
505- newdata <- as.data.frame(as.matrix(ep ))
506- colSums(is.na(newdata )) # 0 NAs
502+ newdata = as.data.frame(as.matrix(ep ))
503+ colSums(is.na(newdata )) # 0 NAs
507504# but assuming there were 0s results in a more generic approach
508- ind <- rowSums(is.na(newdata )) == 0
509- tmp <- autotuner_rf $ predict_newdata(newdata = newdata [ind , ], task = task )
510- newdata [ind , " pred" ] <- data.table :: as.data.table(tmp )[[" response" ]]
511- pred_2 <- ep $ dem
505+ ind = rowSums(is.na(newdata )) == 0
506+ tmp = autotuner_rf $ predict_newdata(newdata = newdata [ind , ], task = task )
507+ newdata [ind , " pred" ] = data.table :: as.data.table(tmp )[[" response" ]]
508+ pred_2 = ep $ dem
512509# now fill the raster with the predicted values
513- pred_2 [] <- newdata $ pred
510+ pred_2 [] = newdata $ pred
514511# check if terra and our manual prediction is the same
515512all(values(pred - pred_2 ) == 0 )
516513```
0 commit comments