# Generated by `rjournal_pdf_article()` using `knitr::purl()`: do not edit by hand
# Please edit li-dodwell-cook.Rmd to modify this file

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  echo = FALSE, 
  cache=FALSE, 
  message=FALSE, 
  warning=FALSE, 
  fig.retina = 3,
  fig.align = "center",
  out.width="100%",
  dev = "png")


## ----libraries----------------------------------------------------------------

if (packageVersion("ggbeeswarm") != "0.6.0") {
  stop("This file requires `ggbeeswarm 0.6.0` to draw the timeline plot correctly! Please install the `remotes` pakcage and get the correct version of `ggbeeswarm` using `remotes::install_version('ggbeeswarm', version = '0.6.0')`, then try to knit this file.")
}

library(dplyr)
library(readr)
library(ggplot2)
library(spotoroo)
library(ggforce)
library(patchwork)


## ----step2figs, fig.cap="Illustration showing Step 2 of the clustering algorithm on a sample of 20 hotspots in one time window $\\boldsymbol{S}_t$. Initially (a), a hotspot is selected randomly ($\\boldsymbol{P}$) in order to seed a cluster. The circle indicates the maximum neighborhood distance ($adjDist$). Nearby hotspots as shown in red are clustered with $\\boldsymbol{P}$ (b) to initialize list $\\boldsymbol{L}$. The neighborhood is moved following every point in that collected list $\\boldsymbol{L}$ and new observations are added (c), until there no more points that can be grouped (d). Then a new hotspot is selected external to the existing cluster, and the process is repeated (e). At the end, all the hotspots will be clustered (f).", fig.height = 5, fig.width = 20----

font_size <- 20
point_size <- 7

set.seed(1256)
x <- rnorm(10, mean = 0, sd = 3)
y <- rnorm(10, mean = 0, sd = 2)
x <- c(x, rnorm(10, mean = 5, sd = 2))
y <- c(y, rnorm(10, mean = 5, sd = 3))
points <- data.frame(x=x, y=y)

ggplot(points) +
  geom_point(aes(x,y), shape = 21, size = point_size/3) +
  geom_point(data = NULL, aes(points$x[3], 
                              points$y[3],
                              col = "L"),
                              size = point_size) +
  geom_circle(data = NULL, aes(x0 = points$x[3], y0 = points$y[3], r = 3)) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  theme(legend.position = "none") +
  scale_color_manual(values = "red") +
  labs(col = "") +
  ggtitle("Step 2: (a)") -> p1

ggplot() +
  geom_point(data = points, aes(x, y), size = point_size/3, shape = 21) +
  geom_segment(data = NULL, aes(x = points$x[3], 
                                y = points$y[3],
                                xend = points$x[c(2, 4, 7, 11, 18)],
                                yend = points$y[c(2, 4, 7, 11, 18)])) +
  geom_point(data = NULL, aes(points$x[c(2, 4, 7, 11, 18)], 
                              points$y[c(2, 4, 7, 11, 18)],
                              col = "L"),
             size = point_size/3) +
  geom_point(data = NULL, aes(points$x[3], 
                              points$y[3],
                              col = "L"),
                              size = point_size) +
  geom_circle(data = NULL, aes(x0 = points$x[3], y0 = points$y[3], r = 3)) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(legend.position = "none") +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  scale_color_manual(values = "red") +
  labs(col = "", size = "") +
  ggtitle("Step 2: (b)") -> p2


ggplot() +
  geom_point(data = points, aes(x, y), size = point_size/3, shape = 21) +
  geom_segment(data = NULL, aes(x = points$x[7], 
                                y = points$y[7],
                                xend = points$x[c(5, 15)],
                                yend = points$y[c(5, 15)])) +
  geom_point(data = NULL, aes(points$x[c(2, 3, 4, 11, 18, 5, 15)], 
                              points$y[c(2, 3, 4, 11, 18, 5, 15)],
                              col = "L"),
             size = point_size/3) +
  geom_point(data = NULL, aes(points$x[7], 
                              points$y[7],
                              col = "L"),
                              size = point_size) +
  geom_circle(data = NULL, aes(x0 = points$x[7], y0 = points$y[7], r = 3)) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(legend.position = "none") +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  scale_color_manual(values = "red") +
  labs(col = "", size = "") +
  ggtitle("Step 2: (c)") -> p3

ggplot() +
  geom_point(data = points, aes(x, y), size = point_size/3, shape = 21) +
  geom_point(data = NULL, aes(points$x[c(2, 3, 4, 7, 11, 18, 5, 12, 13, 14, 15, 16, 17, 18)], 
                              points$y[c(2, 3, 4, 7, 11, 18, 5, 12, 13, 14, 15, 16, 17, 18)],
                              col = "L"),
             size = point_size/3) +
  geom_point(data = NULL, aes(points$x[16], 
                              points$y[16],
                              col = "L"),
                              size = point_size) +
  geom_circle(data = NULL, aes(x0 = points$x[16], y0 = points$y[16], r = 3)) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(legend.position = "none") +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  scale_color_manual(values = "red") +
  labs(col = "", size = "") +
  ggtitle("Step 2: (d)") -> p4

ggplot() +
  geom_point(data = points, aes(x, y), size = point_size/3, shape = 21) +
  geom_point(data = NULL, aes(points$x[-c(2, 3, 4, 7, 11, 18, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20)], 
                              points$y[-c(2, 3, 4, 7, 11, 18, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20)],
                              col = "L"),
             size = point_size/3) +
  geom_point(data = NULL, aes(points$x[6], 
                              points$y[6],
                              col = "L"),
                              size = point_size) +
  geom_circle(data = NULL, aes(x0 = points$x[6], y0 = points$y[6], r = 3)) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(legend.position = "none") +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  scale_color_manual(values = "red") +
  labs(col = "", size = "") +
  ggtitle("Step 2: (e)") -> p5


point_labels <- rep(1, 20)
point_labels[c(2, 3, 4, 7, 11, 18, 5, 12, 13, 14, 15, 16, 17, 18)] <- 2
point_labels[19] <- 3
point_labels[20] <- 4

ggplot() +
  geom_text(data = points, aes(x, y, label = point_labels), size = point_size) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(legend.position = "none") +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  ggtitle("Step 2: (f)") -> p6

p1 + p2 + p3 + p4 + p5 + p6 + plot_layout(nrow = 1)


## -----------------------------------------------------------------------------
x <- c(x, rnorm(10, mean = 6, sd = 2))
y <- c(y, rnorm(10, mean = 7, sd = 2))
x <- c(x, rnorm(3, mean = 0, sd = 1))
y <- c(y, rnorm(3, mean = 10, sd = 1))
points_2 <- data.frame(x=x, y=y)

point_labels2 <- rep(1, 33)
point_labels2[19] <- 2
point_labels2[c(1, 8, 6, 9, 10)] <- 3
point_labels2[c(31, 32, 33)] <- 4

all_con_lines <- data.frame()

for (i in 1:33) {
  for (j in 1:33) {
    if (i >= j) next
    if (c(dist(points_2[c(i, j), ])) < 3) {
      all_con_lines <- all_con_lines %>%
        bind_rows(data.frame(x = points_2$x[i], xend = points_2$x[j],
                             y = points_2$y[i], yend = points_2$y[j]))
    }
  }
}


## -----------------------------------------------------------------------------
X1 <- as.data.frame(points_2[point_labels2 == 1,])
hull_1 <- chull(X1)
hull_1 <- c(hull_1, hull_1[1])

X3 <- as.data.frame(points_2[point_labels2 == 3,])
hull_3 <- chull(X3)
hull_3 <- c(hull_3, hull_3[1])

X4 <- as.data.frame(points_2[point_labels2 == 4,])
hull_4 <- chull(X4)
hull_4 <- c(hull_4, hull_4[1])

ggplot() +
  geom_segment(data = all_con_lines, aes(x = x, xend = xend, y = y, yend = yend), alpha = 0.3) +
  geom_text(data = points_2[1:20, ], aes(x, y, label = point_labels), col = "seagreen4", size = point_size) +
  geom_point(data = points_2[21:33, ], aes(x, y), col = "salmon3", size = point_size/3) +
  coord_fixed() + 
  theme_bw(base_size = font_size) +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  labs(col = "", 
       shape = expression("Hotspots "*" in "*S[t-1]), 
       title = expression("Step 3: (b)")) -> p3


## -----------------------------------------------------------------------------
ggplot(data = points_2) +
  geom_point(data = points_2[21:33, ], aes(x, y, col = c(1:33)<=20), col = "salmon3", size = point_size/3) +
  geom_text(data = points_2[1:20, ], aes(x, y, label = point_labels), col = "seagreen4", size = point_size) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(legend.position = "none") +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  ggtitle("Step 3: (a)") -> p2



## ----step3figs, fig.cap = "Illustration of clustering Step 3, which involves combining results from one time window to the next. There are 33 hotspots at $\\boldsymbol{S}_t$, where 20 (green) of them have been previously clustered at $\\boldsymbol{S}_{t-1}$ (Figure 1 f) and 13 (orange) of them are new hotspots. The connected graph show the clustering in this time window. Hotspots previously clustered at $\\boldsymbol{S}_{t-1}$ keep their cluster labels. The 13 new hotspots are assigned labels of the nearest hotspot's cluster label. This might mean that a big cluster $\\boldsymbol{S}_t$ (indicated by the graph) would be split back into two, if it corresponded to two clusters at $\\boldsymbol{S}_{t-1}$ (e.g. clusters 2, 4). New clusters of hotspots are assigned a new label (e.g. cluster 5).", fig.height = 6, fig.width = 18.5, out.width = "100%"----

final_labels <- c(point_labels, 4, 2, 4, 4, 2, 2, 2, 4, 4, 2, 5, 5, 5)

X1 <- as.data.frame(points_2[final_labels == 1,])
hull_1 <- chull(X1)
hull_1 <- c(hull_1, hull_1[1])

X2 <- as.data.frame(points_2[final_labels == 2,])
hull_2 <- chull(X2)
hull_2 <- c(hull_2, hull_2[1])

X3 <- as.data.frame(points_2[final_labels == 3,])
hull_3 <- chull(X3)
hull_3 <- c(hull_3, hull_3[1])

X4 <- as.data.frame(points_2[final_labels == 4,])
hull_4 <- chull(X4)
hull_4 <- c(hull_4, hull_4[1])

X5 <- as.data.frame(points_2[final_labels == 5,])
hull_5 <- chull(X5)
hull_5 <- c(hull_5, hull_5[1])

ggplot() +
  geom_text(data = points_2, aes(x, y, label = final_labels, col = c(1:33)<=20), size = point_size) +
  coord_fixed() +
  theme_bw(base_size = font_size) +
  theme(legend.position = "none") +
  theme(axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank()) +
  labs(col = expression("Hotspots "*" in "*S[t-1]), title = expression("Step 3: (c)")) +
  scale_color_manual(values = c("salmon3", "seagreen4")) -> p4
  
p2 + p3 + p4 + plot_layout(nrow = 1)


## ----message=FALSE------------------------------------------------------------
if (!file.exists("data/small.rda")) {
  result <- hotspot_cluster(hotspots = hotspots,
                          lon = "lon",
                          lat = "lat",
                          obsTime = "obsTime",
                          activeTime = 24,
                          adjDist = 3000,
                          minPts = 4,
                          minTime = 3,
                          timeUnit = "h",
                          timeStep = 1)
  save(result, file = "data/small.rda")
} else {
  load("data/small.rda")
}



## ----echo = TRUE, message=TRUE------------------------------------------------
result


## ----echo = TRUE, message=TRUE------------------------------------------------
result$hotspots %>% arrange(obsTime) %>% glimpse()


## ----echo = TRUE, message=TRUE------------------------------------------------
glimpse(result$ignition)


## ----echo = TRUE, message=TRUE------------------------------------------------
result$setting


## ----echo=TRUE, message=TRUE--------------------------------------------------
all_fires <- extract_fire(result, noise = TRUE)
all_fires %>% arrange(obsTime) %>% glimpse()


## ----echo=TRUE, message=TRUE--------------------------------------------------
fire_1_and_2 <- extract_fire(result, cluster = c(1, 2), noise = FALSE)


## ----demodefplot, echo=TRUE, fig.cap="This is the default plot for visualizing the spatial distribution of clusters. In the results shown there are six clusters, which correspond to six fires, shown using different colors. The black dots indicate the ignition site for each fire.", out.width = "100%", fig.width=8, fig.height=4.9----

plot(result, bg = plot_vic_map())


## ----demomovplot, echo=TRUE, fig.cap="This is the fire movement plot for visualizing the fire dynamics. Here there are six clusters, corresponding to six different fires. The path between the ignition point and the end point is drawn with black line, where the triangle is the ignition point and the circle is the end point. (Note that the aspect ratio of the plot reflects the relative spatial ratio of latitude and longitude.)", fig.width=8, fig.height=5.5----
plot(result, type = "mov", step = 12)


## ----demotimeline, echo=TRUE, fig.cap="This is the timeline plot for providing an overview of the bushfire season. The x-axis is the date and the y-axis is the cluster membership. The observed time of hotspots are shown as dot plots (green). The density plot at the top display the temporal frequency of fire occurrence over the timeframe. The dot plot at the bottom (orange) shows the observed time of hotspots that are considered to be noise.", out.width = "100%", fig.width=8, fig.height=4----
plot(result, type = "timeline")


## -----------------------------------------------------------------------------
if (!file.exists("data/VIC_result.rda")) {
  him_hotspots <- read_csv("data/VIC_hotspots_raw2.csv")
  
  result <- hotspot_cluster(hotspots = him_hotspots,
                            lon = "lon",
                            lat = "lat",
                            obsTime = "obsTime",
                            activeTime = 24,
                            adjDist = 3000,
                            minPts = 4,
                            minTime = 3,
                            ignitionCenter = "mean",
                            timeUnit = "h",
                            timeStep = 1)
  
  
  save(result, file = "data/VIC_result.rda")

  rm(result)
  rm(him_hotspots)

}

load("data/VIC_result.rda")


## ----echo=TRUE,message=TRUE---------------------------------------------------
result


## ----clusteringfinalresults, fig.cap=" The distribution of hotspots (black) and bushfire ignitions (red) in Victoria during 2019-2020 Australian bushfire season. The spatial distribution of the ignition locations suggest that most of the fires were observed in the east of Victoria.", fig.width=8, fig.height=5----
plot(result, bg = plot_vic_map(), hotspot = TRUE) 


## ----himtimeline, fig.cap="Timeline of fires observed in Victoria during the 2019-2020 Australian bushfire season. Clustered hotspots are shown as dotplots (green). The density display of the timeline shows that most fires started in late December and early January. Noise is shown at the bottom (orange), with the dashed lines indicating the density. This plot shows there is a significant number of hotspots that could be considered to be noise, especially in mid-December. It might also suggest that there are lots of short-lived and spatially constrained fires.", fig.width=8, fig.height=4----
plot(result, type = "timeline", mainBreak = "1 month", dateLabel = "%b %d, %y")


## ----firemovem, echo = TRUE, fig.cap="Examining the dynamics of the four most intensive fires in Victoria during the 2019-2020 Australian bushfire season. All of the fires covered similar spatial areas over their lifetimes, but the trajectory was quite different. Fire 163 may have spread in many directions simultaneously over the time period, as indicated by the near constant location of the centroid.", fig.width=8, fig.height=3.5----

plot(result, 
     type = "mov", 
     cluster = order(result$ignition$obsInCluster,
                     decreasing = TRUE)[1:4], 
     step = 12, 
     bg = plot_vic_map())


## ----vis1, fig.cap="Parameter tuning plots, where the best choice of parameter is at a large drop in the percentage of noise points. Here, these are at $AdjDist = 3000$ and $activeTime = 24$. ", fig.height=5, fig.width=8----
if (!file.exists("data/grid.csv")) {
  source("scripts/grid_search.R")
}
mygrid <- read_csv("data/grid.csv")
ggplot(mygrid) +
  geom_line(aes(adjDist/1000, noise_prop, col = factor(activeTime))) +
  theme_bw() +
  scale_x_continuous(breaks = seq(1, 10, 1)) +
  labs(col = "activeTime", x = "adjDist (km)") +
  ylab("Percentage of noise points") +
  theme(legend.position = "bottom") +
  scale_color_brewer(palette = "Paired") +
  ggtitle("(a)") -> p1

ggplot(filter(mygrid, adjDist > 2000)) +
  geom_line(aes(activeTime, noise_prop, col = factor(adjDist))) +
  theme_bw() +
  scale_x_continuous(breaks = seq(6, 48, 6)) +
  labs(col = "adjDist", x = "activeTime (hours)") +
  ylab("Percentage of noise points") +
  theme(legend.position = "bottom") +
  scale_color_brewer(palette = "Paired") +
  ggtitle("(b)") -> p2

p1 + p2

