1 Background

1.1 Objective

Objective of both scripts is to semi-automatically obtain the species/genus for trees (>15 m height) from remote sensing data. Classes are Norway spruce (Picea abies, PA), European silver fir (Abies alba, AA), Scots Pine (Pinus sylvestris, PS), European Larch (Larix decidua, LD), Douglas fir (Pseudotsugea menziesii, PM), unspecified deciduous broadleaved species (DB) and snags (WD).

This present Script 2 applies the Random Forest model set up in Script 1 to the treetop buffer areas of the entire National Park. However, for demonstration purpose of this markup only a small project area is considered.

1.2 Working steps

Tree crown outline polygons are in shp-format and grouped in tiles. Each tile contains the polygons of an area of 1x1 km. The present script loops through those tiles and
1) reads the geo-coordinates of the treetop points
2) filters the treetop points by attribute and location (eliminiating outliers)
3) calculates buffers of 1.7m radius for each treetop
4) extracts and aggregate raster values for the buffer areas
5) calculates tree crown shape statistics using the altitude-normalized ALS targets. As the handling of billions of ALS points is very time consuming, the relevant altitude-normalized points have been extracted using a specialized software package. The input data for this script are only the points that fall within the treetop areas.
6) feeds those parameters into a random forest classifier
7) exports the results as shp-data and csv.

2 General settings

2.1 Define and set control switches

sw_save_results_as_rds <- T   # TRUE: save results as RDS-file (useful to
                                # avoid repeating time consuming processes)
sw_extract_rasters <- F       # TRUE: extract() raster info 
                                # (as this is the most time consuming process, 
                                #  results will be saved in rds-files)
sw_calculate_lidar_metrics <- F # TRUE: calculates metrics from lidar targets
                                # (which is time consuming)
sw_test_run <- T              # TRUE: run test dataset 
                                # (use different directories for input/output)
sw_markup_plots <- T          # TRUE: plots will be drawn in markup 
                                # (which is time consuming)
options(stringsAsFactors = F) # FALSE: strings are not factors

2.2 Include R-packages

# general purpose function by Anish Sugathan to install not-yet installed packages and load them 
usePackage <- function(p) {
  # load a package if installed, else load after installation.
  # Args:  p: package name in quotes
  if (!is.element(p, installed.packages()[,1])) install.packages(p, dep = TRUE)
  require(p, character.only = TRUE)
}

usePackage('raster')        # read and manipiulate raster data
usePackage('rgdal')         # read and manipulate vector data (GDAL)
usePackage('sf')            # read and manipulate vector data (simple features)
usePackage('dplyr')         # efficiently manipulating datasets
usePackage('ggplot2')       # advanced visualization, ggplot (with two 'g's)
usePackage('rasterVis')     # visualize of raster data, gplot (with one 'g') 
usePackage('gridExtra')     # arrange multiple grid-based plots on a page
usePackage('randomForest')  # random forest model functions
usePackage('caret')         # optimize and validiate models
usePackage('RStoolbox')     # remote sensing data processing  
usePackage('foreign')       # read dbf-files
usePackage('rgeos')         # interface to Geometry Engine - Open Source 
usePackage('magrittr')      # provides the use of pipes %>%
usePackage('readr')         # parsing a flat file into a tibble

2.3 Set data paths for input and output

if(sw_test_run == TRUE) {
  path_input_data  <- "data_markup/data_input/"
  path_output_data <- "data_markup/data_output/"
  
} else {
  path_input_data  <- "input_data/"
  path_output_data <- "output_data/"
}

2.4 Initialize and compile lists used by the loop

# Initalize lists to store the result data from each run in the subsequent loop.
# As each tree crown outline tile has its individual number of tree crowns, 
# storage in a list is more suitable than in a dataframe
list_raster_extract_all_tiles <- list() # collects extract results from raster 
list_lidar_metrics_tree       <- list() # collects metric for single tree
list_lidar_metrics_all_tiles  <- list() # collects metric data for one tile  
list_attributes_all_tiles     <- list() # collects all parameters for one tile

# compile list of dbf-file names of the tree crown outline polygon tiles
list_tile_names <- list.files(
  path = paste0(path_input_data, "lidar/tree_crown_polygon_shp/"), 
  pattern = "\\.dbf$", full.names = TRUE)
print(list_tile_names)
## [1] "data_markup/data_input/lidar/tree_crown_polygon_shp/3444_5373_DSM_04m_br_G200_M50000000.dbf"
# compile list of folder names that hold the altitude-normalized ALS targets  
# of the treetop areas
list_lidar_targets_dir_names <- list.files(
  path = paste0(path_input_data, "lidar/points_altitude-normalized_asc/"), 
  full.names = TRUE )
print(list_lidar_targets_dir_names)
## [1] "data_markup/data_input/lidar/points_altitude-normalized_asc/3444_5373_ACge10_ACse400_HTge15_HTle70"

3 Load raster dataset

r_ratio_2015_red <- raster(paste0(path_input_data, 
                        "raster/TDOP_2015_ratio_8bit_red.tif"))
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
r_ratio_2015_gre <- raster(paste0(path_input_data, 
                        "raster/TDOP_2015_ratio_8bit_gre.tif"))
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
r_ratio_2015_blu <- raster(paste0(path_input_data, 
                        "raster/TDOP_2015_ratio_8bit_blu.tif"))
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
r_ratio_2015_nir <- raster(paste0(path_input_data, 
                        "raster/TDOP_2015_ratio_8bit_nir.tif")) 
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
r_pca_1_2015 <- raster(paste0(path_input_data, 
                        "raster/TDOP_2015_PCA_1.tif"))
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
r_ndvi_2014 <- raster(paste0(path_input_data, 
                        "raster/TDOP_2014_NDVI_8bit.tif"))
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
r_ndvi_2015 <- raster(paste0(path_input_data, 
                        "raster/TDOP_2015_NDVI_8bit.tif"))
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
r_deri1_nir_2015 <- raster(paste0(path_input_data, 
                        "raster/TDOP_2015_deri1_nir_qgis.tif"))
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Deutsches Hauptdreiecksnetz in Proj4 definition
if(sw_markup_plots == TRUE) {

  maxpixels <- r_ratio_2015_red@ncols * r_ratio_2015_red@nrows
  maxpixels <- 100000 # limits pixel resolution of raster

ggp1 <- gplot(r_ratio_2015_red, maxpixels=maxpixels) + ggtitle("r_ratio_2015_red") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(0,194))

ggp2 <- gplot(r_ratio_2015_gre, maxpixels=maxpixels) + ggtitle("r_ratio_2015_gre") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(0,167))

ggp3 <- gplot(r_ratio_2015_blu, maxpixels=maxpixels) + ggtitle("r_ratio_2015_blu") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(0,200))

ggp4 <- gplot(r_ratio_2015_nir, maxpixels=maxpixels) + ggtitle("r_ratio_2015_nir") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(0,255))

ggp5 <- gplot(r_pca_1_2015, maxpixels=maxpixels) + ggtitle("r_pca_1_2015") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(110,145))

ggp6 <- gplot(r_ndvi_2014, maxpixels=maxpixels) + ggtitle("r_ndvi_2014") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(0,240))

ggp7 <- gplot(r_ndvi_2015, maxpixels=maxpixels) + ggtitle("r_ndvi_2015") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(0,220))

ggp8 <- gplot(r_deri1_nir_2015, maxpixels=maxpixels) + ggtitle("r_deri1_nir_2015") +
  geom_tile(aes(fill=value), show.legend=FALSE) + theme_void() + coord_equal() +
  scale_fill_gradient(low='black', high='white', limits=c(89.975,89.999))

 grid.arrange(ggp1, ggp2, ggp3, ggp4,ggp5, ggp6, ggp7, ggp8,ncol = 3) 
}

4 Filter treetops to be included into the classification

4.1 Set crown property parameters to filter treetops

Data analysis considers only a circular area of radius 1.7m around the crown peak. The radius is a compromise of an area big enough to include sufficient spectral information, while avoiding overlap with the crown areas of neighbouring trees.

buffer_radius = 1.7 # (meter)

# as this radius still cause overlap for small trees or small crowns,  
# those need to be excluded: 
HEIGHTTOP_min <- 15   # (meter) to exclude small trees   
AREACROWN_min <- 10   # (square meter) to exclude small small crown areas

# some crowns show unrealistically high values for tree height or crown area 
# (caused by the crown identification process) and need to be excluded.
HEIGHTTOP_max <- 70   # exclude outliers
AREACROWN_max <- 400  # exclude crown areas that are too large due to processing artefacts

4.2 Set regions to filter treetops

In order to limit calculation time, only treetops within a polygon of 1000m buffer of a convexhull around National Park boundary are considered for the classification. For this markup-demo the project region is further reduced to a rectangle of 145 x 120 m.

# polygon of 1000m buffer of convexhull around National Park boundary
sf_nlp_1000m_boundary_poly <- st_read(paste0(path_input_data,
                    "project_boundary/NLP_ConvexHull_AllBuffer_1000m.geojson"))

# polygon of project area (to reduce processing time in markup-demo)
sf_markup_boundary_poly <- st_read(paste0(path_input_data,
                    "project_boundary/project_boundary_markup.geojson"))

ggplot() + 
  geom_sf(data = st_geometry(sf_nlp_1000m_boundary_poly), 
          aes(colour = "nlp_1000m_boundary")) +
  geom_sf(data = st_geometry(sf_markup_boundary_poly), 
          aes(colour = "markup_boundary"))

4.3 Main loop

The main loop runs through the tiles of tree crown polygons, thus executing the working steps outlined in the introduction. For this markup-demo this loop is not required as only a reduced project region is considered. Hence, the loop is de-activated and the working steps are executed only once.

i <- 1 # the loop-index needs to be fixed as the loop is de-activated 

#for (i in 1:length(list_tile_names)) {

  # Read the attribute data (dbf) of the tree crown tile (shp-file) 
  df_crown_parameter <- read.dbf(list_tile_names[i]) 
  str(df_crown_parameter) # display the data frame structure
## 'data.frame':    22437 obs. of  10 variables:
##  $ ID       : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ WOODTYPE : Factor w/ 1 level "BW": 1 1 1 1 1 1 1 1 1 1 ...
##  $ EASTTOP  : num  3444039 3444023 3444501 3444082 3444106 ...
##  $ NORTHTOP : num  5374000 5374000 5374000 5374000 5374000 ...
##  $ HEIGHTTOP: num  21.4 16.2 18.6 40.5 36.3 ...
##  $ HEIGHTDTM: num  840 850 821 819 810 ...
##  $ AREACROWN: num  25.4 40.9 20.8 69.9 66.9 ...
##  $ RAD1ELLIP: num  3.45 3.02 2.45 4.58 5.11 ...
##  $ RAD2ELLIP: num  2.56 4.41 2.78 4.82 4.57 ...
##  $ DIRELLIP : num  -0.948 0.883 1.422 0.84 0.124 ...
##  - attr(*, "data_types")= chr [1:10] "N" "C" "N" "N" ...
  df_crown_parameter <- df_crown_parameter[,c(1,3:7)] # eliminate unrequired fields  
  head(df_crown_parameter) # display the top lines of the data frame
##   ID EASTTOP NORTHTOP HEIGHTTOP HEIGHTDTM AREACROWN
## 1  0 3444039  5374000 21.350010    840.24  25.41760
## 2  1 3444023  5374000 16.170004    849.59  40.89117
## 3  2 3444501  5374000 18.610003    820.95  20.80000
## 4  3 3444082  5374000 40.490001    819.28  69.92512
## 5  4 3444106  5374000 36.310002    809.81  66.88000
## 6  5 3444653  5374000  4.650006    868.15   3.68000
  # only for demo purpose we plot the crown polygons
  sf_crown_parameter <- st_read(
                  paste0(tools::file_path_sans_ext(list_tile_names[i]),".shp"))
## Reading layer `3444_5373_DSM_04m_br_G200_M50000000' from data source 
##   `Y:\Baumartenklassifikation\R_script\2021_publication\data_markup\data_input\lidar\tree_crown_polygon_shp\3444_5373_DSM_04m_br_G200_M50000000.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 22437 features and 10 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 3443993 ymin: 5372993 xmax: 3445006 ymax: 5374013
## CRS:           NA
  # as shp-file lacks a coordinate reference system (crs), it needs to be set
  st_crs(sf_crown_parameter) = st_crs(sf_nlp_1000m_boundary_poly) 

  # subset polygons to markup_boundary
  sf_crown_parameter <- sf_crown_parameter[sf_markup_boundary_poly,]  
  ggplot()  + 
        geom_sf(data = st_geometry(sf_crown_parameter)) + 
        geom_sf(data = st_geometry(sf_markup_boundary_poly), 
                aes(colour = "markup_boundary", alpha=0.9))

4.3.0.1 Apply the crown property parameters to filter treetops

# Exclude crowns that are too small and too big or high or are outliers
  df_crown_parameter <- subset(df_crown_parameter, 
                           AREACROWN>=AREACROWN_min & AREACROWN<=AREACROWN_max & 
                           HEIGHTTOP>=HEIGHTTOP_min & HEIGHTTOP<=HEIGHTTOP_max )

  # apply the same filter to the polygons and plot the subset
  sf_crown_parameter <- subset(sf_crown_parameter, 
                         AREACROWN>=AREACROWN_min & AREACROWN<=AREACROWN_max & 
                         HEIGHTTOP>=HEIGHTTOP_min & HEIGHTTOP<=HEIGHTTOP_max ) 
  ggplot()  + 
        geom_sf(data = st_geometry(sf_crown_parameter)) + 
        geom_sf(data = st_geometry(sf_markup_boundary_poly), 
                aes(colour = "markup_boundary", alpha=0.9))

#### Apply regions to filter treetops

   # convert crown peak point coordinates to simple feature points
    sf_crown_points <- st_as_sf(x = df_crown_parameter,                         
           coords = c("EASTTOP", "NORTHTOP"),
           crs = st_crs(sf_nlp_1000m_boundary_poly))
    str(sf_crown_points) # display structure of simple feature points
## Classes 'sf' and 'data.frame':   14894 obs. of  5 variables:
##  $ ID       : int  0 1 2 3 4 6 7 8 9 11 ...
##  $ HEIGHTTOP: num  21.4 16.2 18.6 40.5 36.3 ...
##  $ HEIGHTDTM: num  840 850 821 819 810 ...
##  $ AREACROWN: num  25.4 40.9 20.8 69.9 66.9 ...
##  $ geometry :sfc_POINT of length 14894; first list element:  'XY' num  3444039 5374000
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
##   ..- attr(*, "names")= chr [1:4] "ID" "HEIGHTTOP" "HEIGHTDTM" "AREACROWN"
    # clip points by nlp 1000m buffer boundary polygon
    sf_crown_points <- sf_crown_points[sf_nlp_1000m_boundary_poly,]
    
    # Generate field Feature_ID to join to raster-extract to lidar_metrics
    # as Feature_ID starts at 0 we need to subtract 1 from nrow
    sf_crown_points$Feature_ID <- 
                  as.integer(seq(0, nrow(sf_crown_points)-1), 1) 

    # change the field order (only for cosmetic purpose)
    sf_crown_points <- sf_crown_points[,c(1,6,2,3,4,5)]

    # limit the project area (for this markup only) 
    if(sw_test_run == TRUE) {
      
      sf_crown_points <- sf_crown_points[sf_markup_boundary_poly,]

      ggplot()  +
        ggtitle("Treetop points inside the markdown_boundary")+ 
        geom_sf(data = st_geometry(sf_crown_parameter)) + 
        geom_sf(data = st_geometry(sf_crown_points))+ 
        geom_sf(data = st_geometry(sf_markup_boundary_poly), 
                aes(colour = "markup_boundary", alpha=0.9))
    }

    str(sf_crown_points)
## Classes 'sf' and 'data.frame':   361 obs. of  6 variables:
##  $ ID        : int  13300 13301 13302 13318 13319 13331 13340 13341 13349 13363 ...
##  $ Feature_ID: int  8922 8923 8924 8936 8937 8948 8953 8954 8962 8971 ...
##  $ HEIGHTTOP : num  21 17.2 18 20.7 17.7 ...
##  $ HEIGHTDTM : num  947 950 950 949 951 ...
##  $ AREACROWN : num  39.2 29.4 29.6 42.7 23 ...
##  $ geometry  :sfc_POINT of length 361; first list element:  'XY' num  3444782 5373450
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:5] "ID" "Feature_ID" "HEIGHTTOP" "HEIGHTDTM" ...

4.3.0.2 Generate point buffer to extract raster information

   sf_crown_point_buffers <- st_buffer(sf_crown_points, 
                                        dist = buffer_radius) 
   ggplot() +
    ggtitle("Treetop buffers") + 
    geom_sf(data = st_geometry(sf_crown_parameter)) + 
    geom_sf(data = st_geometry(sf_crown_point_buffers))+ 
    geom_sf(data = st_geometry(sf_markup_boundary_poly), 
                      aes(colour = "markup_boundary", alpha=0.9) )

   # Exclude those buffers not falling completely within the markdown_boundary
   # as the rasters are clipped to that boundary, 
   # hence part of the buffer would extract no information 
   sf_crown_point_buffers <- 
     sf_crown_point_buffers[st_within(sf_crown_point_buffers, 
                                      sf_markup_boundary_poly) %>% lengths > 0,]

   ggplot() +
    ggtitle("Treetop buffers falling completely within the markdown_boundary") + 
    geom_sf(data = st_geometry(sf_crown_parameter)) + 
    geom_sf(data = st_geometry(sf_crown_point_buffers))+ 
    geom_sf(data = st_geometry(sf_markup_boundary_poly), 
                      aes(colour = "markup_boundary", alpha=0.9) )

   str(sf_crown_point_buffers)
## Classes 'sf' and 'data.frame':   342 obs. of  6 variables:
##  $ ID        : int  13383 13384 13401 13415 13429 13436 13445 13446 13466 13467 ...
##  $ Feature_ID: int  8988 8989 9002 9011 9022 9027 9034 9035 9051 9052 ...
##  $ HEIGHTTOP : num  18.5 17.5 19.6 18.1 18.6 ...
##  $ HEIGHTDTM : num  944 951 951 951 951 ...
##  $ AREACROWN : num  17.3 29 33.1 35.2 29.9 ...
##  $ geometry  :sfc_POLYGON of length 342; first list element: List of 1
##   ..$ : num [1:121, 1:2] 3444762 3444762 3444762 3444762 3444762 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA
##   ..- attr(*, "names")= chr [1:5] "ID" "Feature_ID" "HEIGHTTOP" "HEIGHTDTM" ...
   # as the number of buffers had been reduced, 
   # the sf_crown_points require adjustment
   sf_crown_points <- sf_crown_points[sf_crown_points$ID  %in% 
                                      sf_crown_point_buffers$ID, ]

4.3.0.3 Calculate ALS (Lidar)-metrics for treetop buffers

See chapter ‘Working Steps’ for details on the ALS data used here. Collect the ASCII-files holding the targets of the treetop buffers. File names indicate the Feature-ID of treetop buffer polygon.

# compile list of ASCII-files holding each the ASL-targets for one tree
# hence the list holds all the tree file names for one tile
list_lidar_targets_file_names <- list.files(
                                      path = list_lidar_targets_dir_names[i], 
                                      pattern = "\\.asc$", full.names = TRUE)
head(list_lidar_targets_file_names)
## [1] "data_markup/data_input/lidar/points_altitude-normalized_asc/3444_5373_ACge10_ACse400_HTge15_HTle70/Rawdata_Polygon0.rwb.asc"    
## [2] "data_markup/data_input/lidar/points_altitude-normalized_asc/3444_5373_ACge10_ACse400_HTge15_HTle70/Rawdata_Polygon1.rwb.asc"    
## [3] "data_markup/data_input/lidar/points_altitude-normalized_asc/3444_5373_ACge10_ACse400_HTge15_HTle70/Rawdata_Polygon10.rwb.asc"   
## [4] "data_markup/data_input/lidar/points_altitude-normalized_asc/3444_5373_ACge10_ACse400_HTge15_HTle70/Rawdata_Polygon100.rwb.asc"  
## [5] "data_markup/data_input/lidar/points_altitude-normalized_asc/3444_5373_ACge10_ACse400_HTge15_HTle70/Rawdata_Polygon1000.rwb.asc" 
## [6] "data_markup/data_input/lidar/points_altitude-normalized_asc/3444_5373_ACge10_ACse400_HTge15_HTle70/Rawdata_Polygon10000.rwb.asc"
# extract the tile_name from the data path
tile_name <- list_lidar_targets_dir_names[i] %>% 
    strsplit("/") %>%
    unlist()  %>%
    tail(n=1)

# as calculation of lidar metrics is time consuming, 
# during test runs read the results from a RDS file 
if (sw_calculate_lidar_metrics == FALSE) {
   
  df_lidar_metrics_tile <- 
     readRDS(paste0(path_output_data, "TileLidarMetrics_", tile_name, ".rds"))
   
  j <- 1 # set index value 

} else {

# loop through all crown polygon Lidar targets files of this tile
for (j in 1:length(list_lidar_targets_file_names)) {    
  
     # extract tree polygon Feature_ID from df_lidar_targets targets file name
     feature_id <- list_lidar_targets_file_names[j]  %>%  
      strsplit("/") %>%   # split path string into list according to "/"
      unlist() %>%        # unlist list into vector
      tail(n=1) %>%       # file name is last element of vector
      parse_number()      # extract tree polygon number from file name 
     
     # only if a record for this feature_id  
     # exists in the sf_crown_points
     if (feature_id %in% sf_crown_points$Feature_ID) {
       
       df_lidar_targets <- read.table(list_lidar_targets_file_names[j], 
                                        header = TRUE, sep = ' ', dec = '.')
       
       # add Feature_ID as additional column to table df_lidar_targets 
       df_lidar_targets$Feature_ID <- feature_id
       
       ### dataframe to collect the aggregated targets parameters 
       #  one record per tree
       #  calculate mean values for intensity, number of targets and 
       #  target number for all points in df_lidar_targets
       #  Feature_ID is required for a JOIN to sf_crown_points
       
       df_lidar_metrics_tree <- summarise_at(df_lidar_targets, 
                                 .vars = c("X.Intensity.", "X.TargetNumber.", 
                                           "X.NumTargets.", "Feature_ID"), 
                                 list(mean))
  
       #  sum number of targets
       df_lidar_metrics_tree$Sum_NumTargets_all <- 
                                            as.numeric(count(df_lidar_targets)) 
  
       #  adjust column names 
       names(df_lidar_metrics_tree) <- c(
                                      "Mean_Intensity_all", 
                                      "Mean_TargetNum_all", 
                                      "Mean_NumTargets_all", 
                                      "Feature_ID", 
                                      "Sum_Targets_all")
       
       ## subset targets of the top 1.5m of the tree
       df_lidar_targets_top150cm <- filter(df_lidar_targets, 
                                    df_lidar_targets$X.PositionH. > 
                                     (max(df_lidar_targets$X.PositionH.) - 1.5))
  
       # for the targets of the top 1.5m of the tree          
       #  calculate mean values for intensity, mean number of targets and 
       #  the mean target number for all targets in df_lidar_targets
       #  Feature_ID is required for a JOIN to df_lidar_metrics_tree
       df_lidar_metrics_tree_top150cm <- summarise_at(df_lidar_targets_top150cm, 
                                 .vars = c("X.Intensity.", "X.TargetNumber.", 
                                           "X.NumTargets.","Feature_ID"), 
                                 list(mean)) 
  
       #  count top 1.5m number of targets of the tree
       df_lidar_metrics_tree_top150cm$Sum_Targets_top150cm <- 
                                    as.numeric(count(df_lidar_targets_top150cm)) 
  
       #  get ratio of number of top 1.5m targets to number of total targets
       df_lidar_metrics_tree_top150cm <- 
              dplyr::mutate(df_lidar_metrics_tree_top150cm, 
                           Ratio_Targets_top150cm_to_all = 
                             Sum_Targets_top150cm / 
                             df_lidar_metrics_tree$Sum_Targets_all) 
       
       #  adjust column names 
       names(df_lidar_metrics_tree_top150cm) <- c(
                                            "Mean_Intensity_top150cm", 
                                            "Mean_TargetNum_top150cm", 
                                            "Mean_NumTargets_top150cm", 
                                            "Feature_ID", 
                                            "Sum_Targets_top150cm", 
                                            "Ratio_Targets_top150cm_to_all")
  
       # JOIN statistics of top 1.5m to main data frame
       df_lidar_metrics_tree <- dplyr::full_join(df_lidar_metrics_tree, 
                                          df_lidar_metrics_tree_top150cm, 
                                          by = "Feature_ID")  %>%
                                dplyr::select(., c("Feature_ID",
                                              "Ratio_Targets_top150cm_to_all")) 
  
       # collect results into a list that stores Lidar metrics results 
       # for all trees of one tile
       list_lidar_metrics_tree[[j]] <- df_lidar_metrics_tree  
       
    } # if (feature_id %in% sf_crown_points$Feature_ID) 
  } # for (j in 1:length(list_lidar_targets_file_names))

  # convert the collect list_lidar_metrics_tree to a data frame
  df_lidar_metrics_tile <- as.data.frame(bind_rows(list_lidar_metrics_tree, .id = NULL))
  str(df_lidar_metrics_tile)
  
  if (sw_save_results_as_rds== TRUE) {
     saveRDS(df_lidar_metrics_tile, file = paste0(path_output_data, 
                                      "TileLidarMetrics_", tile_name, ".rds"))
  } # if

} # else i.e. (sw_calculate_lidar_metrics == TRUE)

# list to collect metrics of all tiles
list_lidar_metrics_all_tiles[[i]] <- df_lidar_metrics_tile

4.3.0.4 Extract raster values using the treetop buffers

# as raster extraction metrics is time consuming, 
# during test runs read the results from a RDS file 
if (sw_extract_rasters == FALSE) {
   
  sf_crown_points <- 
     readRDS(paste0(path_output_data, "TileRasterExtracts_", tile_name, ".rds"))

} else {

          sf_crown_points$Ratio_2015_Red.Mean    <- 
            as.numeric(raster::extract(r_ratio_2015_red,   
                               sf_crown_point_buffers, fun = mean))
          sf_crown_points$Ratio_2015_Green.Mean  <- 
            as.numeric(raster::extract(r_ratio_2015_gre, 
                               sf_crown_point_buffers, fun = mean))
          sf_crown_points$Ratio_2015_Blue.Mean   <- 
            as.numeric(raster::extract(r_ratio_2015_blu,  
                               sf_crown_point_buffers, fun = mean))
          sf_crown_points$Ratio_2015_NIR.Mean    <- 
            as.numeric(raster::extract(r_ratio_2015_nir,   
                               sf_crown_point_buffers, fun = mean))
        
          sf_crown_points$Deri_1_NIR_2015.Mean <- 
            as.numeric(raster::extract(r_deri1_nir_2015, 
                               sf_crown_point_buffers, fun = mean))
        
          sf_crown_points$PCA_1_2015.Mean <- 
            as.numeric(raster::extract( r_pca_1_2015, 
                                sf_crown_point_buffers, fun = mean))

          sf_crown_points$NDVI_2014.Mean <- 
            as.numeric(raster::extract(r_ndvi_2014, 
                               sf_crown_point_buffers, fun = mean))
          sf_crown_points$NDVI_2015.Mean <- 
            as.numeric(raster::extract(r_ndvi_2015, 
                               sf_crown_point_buffers, fun = mean))
          sf_crown_points$NDVI_2014.Min  <- 
            as.numeric(raster::extract(r_ndvi_2014, 
                               sf_crown_point_buffers, fun = min))
  
          sf_crown_points <- 
            dplyr::mutate(sf_crown_points, 
                   NDVI_Diff_2014_2015.Mean = NDVI_2014.Mean - NDVI_2015.Mean )   

        # collect the results of each tile in a list 
        list_raster_extract_all_tiles[[i]] <- sf_crown_points 
         
        if (sw_save_results_as_rds== TRUE) {
           saveRDS(sf_crown_points, file = paste0(path_output_data, 
                                      "TileRasterExtracts_", tile_name, ".rds"))
        } # if
      } # else i.e. (sw_extract_rasters == TRUE)

      str(sf_crown_points)
## Classes 'sf' and 'data.frame':   342 obs. of  17 variables:
##  $ ID                      : int  13383 13384 13401 13415 13429 13436 13445 13446 13466 13467 ...
##  $ Feature_ID              : int  8988 8989 9002 9011 9022 9027 9034 9035 9051 9052 ...
##  $ HEIGHTTOP               : num  18.5 17.5 19.6 18.1 18.6 ...
##  $ HEIGHTDTM               : num  944 951 951 951 951 ...
##  $ AREACROWN               : num  17.3 29 33.1 35.2 29.9 ...
##  $ geometry                :sfc_POINT of length 342; first list element:  'XY' num  3444761 5373447
##  $ Ratio_2015_Red.Mean     : num  68.8 73.7 91.4 90.6 82.2 ...
##  $ Ratio_2015_Green.Mean   : num  57.9 65.8 85 80.3 68.9 ...
##  $ Ratio_2015_Blue.Mean    : num  29.8 21.2 54 35.2 32.3 ...
##  $ Ratio_2015_NIR.Mean     : num  251 247 202 216 234 ...
##  $ Deri_1_NIR_2015.Mean    : num  90 90 90 90 90 ...
##  $ PCA_1_2015.Mean         : num  127 123 125 124 126 ...
##  $ PCA_2_2015.Mean         : num  144 142 140 138 141 ...
##  $ NDVI_2014.Mean          : num  190 189 159 169 175 ...
##  $ NDVI_2015.Mean          : num  193 188 160 165 177 ...
##  $ NDVI_2014.Min           : num  172 169 148 153 148 132 145 132 169 169 ...
##  $ NDVI_Diff_2014_2015.Mean: num  -3.222 0.731 -0.972 4.185 -1.343 ...
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:16] "ID" "Feature_ID" "HEIGHTTOP" "HEIGHTDTM" ...

4.3.0.5 JOIN lidar-metrics to raster-extracted parameters

# the join fields Feature_ID must be of the same type, i.e. integer
df_lidar_metrics_tile$Feature_ID <- 
                         as.integer(df_lidar_metrics_tile$Feature_ID)

# JOIN lidar-metrics to raster-extracted features
df_tile_attributes <- as.data.frame(inner_join(sf_crown_points, 
                                               df_lidar_metrics_tile,
                                               by = "Feature_ID"))

if (sw_save_results_as_rds == TRUE) { # save results of one tile to rds-file
  saveRDS(df_tile_attributes, 
    file = paste0(path_output_data, "TileJoinedParameters_", tile_name, ".rds"))
}

# collect results into a list that stores all results for all trees of one tile
list_attributes_all_tiles[[i]] <- df_tile_attributes

# --- close the main for-loop loop
# } # for
# ----------------

4.4 Apply the Random Forest Model

RF_Final.rds, the result of script 1, is loaded to be able to run the current script independently.

# load the Random Forest model from file 
RF_model <- readRDS(paste0(path_input_data, "model/RF_Final.rds"))

df_tile_attributes$Class <- as.character(predict(RF_model, df_tile_attributes)) 

sf_tile_parameter <- st_as_sf(x = df_tile_attributes,
                             crs = st_crs(sf_nlp_1000m_boundary_poly))
str(sf_tile_parameter)
## Classes 'sf' and 'data.frame':   342 obs. of  19 variables:
##  $ ID                           : int  13383 13384 13401 13415 13429 13436 13445 13446 13466 13467 ...
##  $ Feature_ID                   : int  8988 8989 9002 9011 9022 9027 9034 9035 9051 9052 ...
##  $ HEIGHTTOP                    : num  18.5 17.5 19.6 18.1 18.6 ...
##  $ HEIGHTDTM                    : num  944 951 951 951 951 ...
##  $ AREACROWN                    : num  17.3 29 33.1 35.2 29.9 ...
##  $ Ratio_2015_Red.Mean          : num  68.8 73.7 91.4 90.6 82.2 ...
##  $ Ratio_2015_Green.Mean        : num  57.9 65.8 85 80.3 68.9 ...
##  $ Ratio_2015_Blue.Mean         : num  29.8 21.2 54 35.2 32.3 ...
##  $ Ratio_2015_NIR.Mean          : num  251 247 202 216 234 ...
##  $ Deri_1_NIR_2015.Mean         : num  90 90 90 90 90 ...
##  $ PCA_1_2015.Mean              : num  127 123 125 124 126 ...
##  $ PCA_2_2015.Mean              : num  144 142 140 138 141 ...
##  $ NDVI_2014.Mean               : num  190 189 159 169 175 ...
##  $ NDVI_2015.Mean               : num  193 188 160 165 177 ...
##  $ NDVI_2014.Min                : num  172 169 148 153 148 132 145 132 169 169 ...
##  $ NDVI_Diff_2014_2015.Mean     : num  -3.222 0.731 -0.972 4.185 -1.343 ...
##  $ Ratio_Targets_top150cm_to_all: num  0.0672 0.0737 0.1142 0.1164 0.0942 ...
##  $ Class                        : chr  "AA" "AA" "AA" "AA" ...
##  $ geometry                     :sfc_POINT of length 342; first list element:  'XY' num  3444761 5373447
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:18] "ID" "Feature_ID" "HEIGHTTOP" "HEIGHTDTM" ...

4.5 Export and plot results

# Export result to RDS
if (sw_save_results_as_rds == TRUE) {       
   saveRDS(sf_tile_parameter, 
        file = paste0(path_output_data, "ClassifiedTile_", tile_name, ".rds"))
}

# Export results as GeoPackage
st_write(sf_tile_parameter, delete_layer = TRUE,
              paste0(path_output_data, "ClassifiedTile_", tile_name, ".gpkg"))
## Deleting layer `ClassifiedTile_3444_5373_ACge10_ACse400_HTge15_HTle70' using driver `GPKG'
## Writing layer `ClassifiedTile_3444_5373_ACge10_ACse400_HTge15_HTle70' to data source 
##   `data_markup/data_output/ClassifiedTile_3444_5373_ACge10_ACse400_HTge15_HTle70.gpkg' using driver `GPKG'
## Writing 342 features with 18 fields and geometry type Point.
# display the result
# use the 1.7m treetop bufers, not the points
sf_tile_parameter_buffer <- st_join(sf_crown_point_buffers, 
                             sf_tile_parameter, by = "Feature_ID") 
sf_crown_parameter_buffer <- st_join(sf_crown_parameter, 
                             sf_tile_parameter, by = "ID") 
# set colors for the Class
lab_class <- c("AA Abies alba",
               "DB Deciduous broadleaved",
               "LD Larix decidua", 
               "PA Picea abies",
               "PM Pseudotsuga menziesii",
               "PS Pinus sylvestris", 
               "WD Snags")
col_class <- c("AA"="red",
               "DB"="green",
               "LD"="blue",
               "PA"="grey",
               "PM"="violet",
               "PS"="orange", 
               "WD"="black")
ggplot() +
    ggtitle("Classified treetop buffer and crown polygons") + 
    scale_fill_manual(labels= lab_class, values=col_class) + 
    geom_sf(data=sf_crown_parameter_buffer, 
              aes(fill= factor(Class), alpha=0.4)) +
    geom_sf(data=sf_tile_parameter_buffer, 
              aes(fill= factor(Class), alpha=0.7)) +
    geom_sf(data = st_geometry(sf_markup_boundary_poly), 
              aes(colour = "markup_boundary", alpha=0.1))