Financial Time Series Models(ARCH/GARCH)

Author

zoo un park

Introduction

The focus of this section is on building financial time series models, specifically for the KOSPI Index and the KRW/USD spot exchange rate. The aim of this project is to examine the influence of the United States on the South Korean economy. More specifically, in this section the goal is to examine how dollar exchange rate risk affects the Korean economy.

The KOSPI Index serves as the main equity index for the Korean financial market and visibly reflects the overall state of Korea’s economy. The exchange rate is a very unique financial asset: it behaves like an equity asset, but is also closely related to the policy rate. Because the exchange rate reflects the value of a currency, governments use it as one of the indicators when deciding their policy rate. Since one variable (the KOSPI) reflects the pure market condition and the other (the exchange rate) contains information about currency value and monetary policy, together they form a strong set of indicators for this project.

The previous section (on multivariate time series models) focused on interactions among variables and their effects. This section instead focuses on the volatility of the variables. As financial assets, the KOSPI Index and the exchange rate are heavily influenced by various factors, which leads to volatility clustering: periods of high volatility tend to be followed by similarly turbulent periods, while calm periods also cluster together. In other words, volatility changes over time depending on past shocks and past variance. Therefore, the focus of this section is to examine volatility and its patterns by applying ARCH/GARCH models. Using these models, the aim is to understand how the volatility of the KOSPI Index and the KRW/USD exchange rate has evolved over time and how major economic crises have affected them. From this analysis, it will be possible to gain deeper intuition about how the value of the Korean won against the U.S. dollar impacts the Korean economy, particularly from a risk management perspective. For further research, the aim is to derive implications for constructing investment portfolios for effective risk management.

Code
library(fGarch)
library(tidyverse)
library(ggplot2)
library(forecast)
library(astsa) 
library(xts)
library(zoo)
library(tseries)
library(fpp2)
library(fma)
library(lubridate)
library(tidyverse)
library(TSstudio)
library(tidyquant)
library(plotly)
library(readr)
library(kableExtra)
library(knitr)
library(patchwork)
library(vars)
library(here)
library(tsibble)
library(dplyr)
# calling data

#KR base rate
bok<- read.csv("../data/interest/bok.csv")%>%
  rename(Date = date)
bok$Date<- as.Date(bok$Date)
bok <- bok %>%
  mutate(Date = floor_date(Date, unit = "quarter")) %>%
  group_by(Date) %>%
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")


# FED rate 

usr<- read.csv("../data/interest/us_rate.csv")%>%
  rename(Date = observation_date,rate = DFF)
usr$Date<- as.Date(usr$Date)
usr<- usr %>%
  mutate(Date = floor_date(Date, unit = "quarter")) %>%
  group_by(Date) %>%
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")

# KRW/USD rate

Start_date <- as.Date("1996-12-11")

kr <- read.csv("../data/fx rate/kor.csv")

kr <- kr %>%
  transmute(
    Date = as.Date(observation_date),
    krw  = DEXKOUS,
    usd  = 1 / DEXKOUS
  ) %>%
  filter(!is.na(Date)) %>%
  arrange(Date) %>%
  distinct(Date, .keep_all = TRUE) %>%
  # drop any rows before 1996-12-11 (optional but usually desired)
  filter(Date >= Start_date) %>%
  # create a complete daily sequence starting from 1996-12-11
  complete(Date = seq(Start_date, max(Date), by = "day")) %>%
  fill(krw, usd, .direction = "down")


# USD index 

usd <- read_csv("../data/fx rate/usd_index.csv", show_col_types = FALSE)
usd <- usd %>%
  transmute(
    Date = as.Date(Date),
    usd_index = Close
  ) %>%
  filter(!is.na(Date)) %>%
  arrange(Date) %>%
  distinct(Date, .keep_all = TRUE) %>%
  complete(Date = seq(min(Date), max(Date), by = "day")) %>%
  fill(usd_index, .direction = "down")


# Yield data 

us_yield  <- read.csv(here("data/yield", "us_yield.csv"))
kor_yield   <- read.csv(here("data/yield", "kor_yield.csv"))


us_yield$Date  <- as.Date(us_yield$Date)
kor_yield$Date <- as.Date(kor_yield$Date)

colnames(us_yield)[colnames(us_yield)  == "X3Y"]  <- "US_3Y"
colnames(us_yield)[colnames(us_yield)  == "X10Y"] <- "US_10Y"
colnames(kor_yield)[colnames(kor_yield) == "X3Y"]  <- "KR_3Y"
colnames(kor_yield)[colnames(kor_yield) == "X10Y"] <- "KR_10Y"

us_yield  <- us_yield[,  c("Date", "US_3Y", "US_10Y")]
kor_yield <- kor_yield[, c("Date", "KR_3Y", "KR_10Y")]

us_yield  <- us_yield  %>% arrange(Date) %>% fill(US_3Y, US_10Y, .direction = "down")
kor_yield <- kor_yield %>% arrange(Date) %>% fill(KR_3Y, KR_10Y, .direction = "down")


df <- merge(us_yield, kor_yield, by = "Date")

df$US_3m10  <- df$US_3Y - df$US_10Y
df$KR_3m10  <- df$KR_3Y - df$KR_10Y
df$US_KR_3Y <- df$US_3Y - df$KR_3Y
df$US_KR_10Y<- df$US_10Y - df$KR_10Y


# S&P500 data
sp500 <- read_csv("../data/stock/sp500.csv")


# KOSPI data
kospi <- read_csv("../data/stock/kospi.csv")
kospi$Date <- as.Date(kospi$Date)

# VIX data(added for the modeling)
vix<- read_csv("../data/stock/vix.csv")%>%
  mutate(Date = ymd(Date)) %>%
  arrange(Date) %>%
  fill(vix, .direction = "down")


# extract spreads from df

us_spreads <- df %>%
  dplyr::select(Date, US_3m10) %>%
  rename(us_spread = US_3m10) %>%
  mutate(Date = as.Date(Date))

kor_spreads <- df %>%
  dplyr::select(Date, KR_3m10) %>%
  rename(kr_spread = KR_3m10) %>%
  mutate(Date = as.Date(Date))

us_kr_spreads_3y <- df %>%
  dplyr::select(Date, US_KR_3Y) %>%
  mutate(Date = as.Date(Date))

us_kr_spreads_10y <- df %>%
  dplyr::select(Date, US_KR_10Y) %>%
  mutate(Date = as.Date(Date))

Finance time series model(KOSPI)

Time series plot

Code
sp_plot <- plot_ly(
  data = kospi,
  x = ~Date,
  type = "candlestick",
  open = ~Open,
  high = ~High,
  low = ~Low,
  close = ~Close,
  name = "KOSPI Index"
) %>%
  layout(
    title = "KOSPI Index",
    yaxis = list(title = "KOSPI index"),
    xaxis = list(
      title = "Date",
      tickformat = "%Y",
      dtick = "M12"
    ),
    legend = list(x = 0.1, y = 0.9)
  )

sp_plot
Code
gg <- ggplot(kospi, aes(x = Date, y = log(Close))) +   
  geom_line(color = 'skyblue') +  
  labs(x = "Year", title = "KOSPI Index(Log transformed)")

plotly_gg <- ggplotly(gg)
plotly_gg
Code
kospi_close<- kospi$Close
kospi_ts <- ts(kospi_close,
          start = decimal_date(as.Date("1996-12-11")),
          frequency = 252)

kospi_return<- diff(log(kospi_ts))

autoplot(kospi_return) +
  ggtitle("Daily Log Returns of KOSPI Index") +
  xlab("Time") +
  ylab("Log Return")

As discussed in the univariate and multivariate time series model sections, the log transformation performed well for the KOSPI data, so it was applied to the KOSPI closing prices. The first plot is a candlestick chart of the KOSPI Index, and the second is the log-transformed KOSPI data. It is evident that applying the log transformation reduces variability.

The third plot shows the log returns of the KOSPI Index. In this plot, volatility clustering is clearly observed: for most periods, the values are centered near zero, but in certain periods the series exhibits high volatility before returning to calmer behavior. This suggests that the data are potentially appropriate for modeling with the ARCH/GARCH family of models.

ACF & PACF plot

Code
library(patchwork)
kospi_acf <- ggAcf(kospi_return,lag.max = 30)+ggtitle("ACF Plot of KOSPI Index") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kospi_pacf <- ggPacf(kospi_return,lag.max = 30)+ggtitle("PACF Plot of KOSPI Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kospi_acf/kospi_pacf

From the plots, KOSPI Index data performs well in terms of autocorrelation by applying log transformation, However there are still some lag points showing autocorrelation.

Code
ggAcf(abs(kospi_return), lag.max = 30) +
  ggtitle("ACF plot of Absolute KOSPI Index Returns") +
  xlab("Lag") +
  ylab("Autocorrelation") +
  theme_minimal()

Code
ggAcf(kospi_return^2, lag.max = 30) +
  ggtitle("ACF plot of Squared KOSPI Index Returns") +
  xlab("Lag") +
  ylab("Autocorrelation") +
  theme_minimal()

Code
# Load required package
library(FinTS)

# Perform ARCH LM Test on Bitcoin returns
arch_test_result <- ArchTest(kospi_return, lags = 1, demean = TRUE)

# Print results
print(arch_test_result)

    ARCH LM-test; Null hypothesis: no ARCH effects

data:  kospi_return
Chi-squared = 414.59, df = 1, p-value < 2.2e-16

p-vlaue is smaller than 0.05(default criterion), indicating rejecting null hypothesis, concluding strong evidence for the presence of ARCH(1) effects in the data.

Two plots for applying absolute value and squaring to KOSPI Index shows significant autocorrelation throughout multiple lags, compare to log return. This implies that the returns are not independently and identically distributed, and gives evidence for need to apply conditional variance models such as ARCH or GARCH to capture the underlying time-varying volatility structure.

Modeling Approach 1: ARIMA +GARCH model

Code
library(patchwork)

kospi_acf <- ggAcf(kospi_ts,lag.max = 30)+ggtitle("ACF Plot of KOSPI Index") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kospi_pacf <- ggPacf(kospi_ts,lag.max = 30)+ggtitle("PACF Plot of KOSPI Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kospi_acf/kospi_pacf

Code
kospi_acf_log <- ggAcf(kospi_return,lag.max = 30)+ggtitle("ACF Plot of KOSPI Index return") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kospi_pacf_log <- ggPacf(kospi_return,lag.max = 30)+ggtitle("PACF Plot of KOSPI Index return") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kospi_acf_log/kospi_pacf_log

The ACF plot shows significant autocorrelation across several lags. In the PACF plot, there is a strong autocorrelation at lag 1, followed by a sharp drop, indicating almost no autocorrelation after lag 1. These plots suggest the presence of dependencies between past and current price movements, and that the series is stationary.

For the KOSPI index returns, the ACF and PACF plots show that most lags lie within the significance bounds; however, there are a few spikes at certain lags, such as lag 5. For manual model selection, a search range from 0 to 5 therefore seems appropriate.

Both plots exhibit similar patterns, indicating that conditional heteroskedasticity is present in the return series. Thus, applying models from the ARCH/GARCH family is appropriate.

Code
ARIMA.c <- function(p_min, p_max, q_min, q_max, data) {
  results <- matrix(NA, nrow = 0, ncol = 6)
  colnames(results) <- c("p", "d", "q", "AIC", "BIC", "AICc")

  for (p in p_min:p_max) {
    for (q in q_min:q_max) {
      for (d in 0:2) {
        if ((p + d + q) <= 12) {  # Complexity constraint
          fit <- Arima(data, order = c(p, d, q))
          metrics <- c(p, d, q, fit$aic, fit$bic, fit$aicc)
          results <- rbind(results, metrics)
        }
      }
    }
  }

  # Convert to data frame
  results_df <- as.data.frame(results)
  colnames(results_df) <- c("p", "d", "q", "AIC", "BIC", "AICc")
  return(results_df)
}

highlight_output = function(output, type="ARIMA") {
    highlight_row <- c(which.min(output$AIC), which.min(output$BIC), which.min(output$AICc))
    knitr::kable(output, align = 'c', caption = paste("Comparison of", type, "Models")) %>%
    kable_styling(full_width = FALSE, position = "center") %>%
    row_spec(highlight_row, bold = TRUE, background = "#FFFF99")  # Highlight row in yellow
}

output <- ARIMA.c(p_min = 0, p_max = 5, q_min = 0, q_max = 5, data = log(kospi_ts))
highlight_output(output)
Comparison of ARIMA Models
p d q AIC BIC AICc
metrics 0 0 0 11970.465 11984.200 11970.466
metrics.1 0 1 0 -38258.268 -38251.401 -38258.268
metrics.2 0 2 0 -33729.271 -33722.404 -33729.271
metrics.3 0 0 1 2403.595 2424.198 2403.599
metrics.4 0 1 1 -38278.236 -38264.501 -38278.234
metrics.5 0 2 1 -38242.423 -38228.689 -38242.422
metrics.6 0 0 2 -5751.991 -5724.521 -5751.986
metrics.7 0 1 2 -38281.585 -38260.983 -38281.581
metrics.8 0 2 2 -38262.354 -38241.752 -38262.350
metrics.9 0 0 3 -12170.494 -12136.157 -12170.486
metrics.10 0 1 3 -38280.025 -38252.556 -38280.019
metrics.11 0 2 3 -38265.719 -38238.251 -38265.714
metrics.12 0 0 4 -16801.609 -16760.404 -16801.597
metrics.13 0 1 4 -38283.929 -38249.592 -38283.920
metrics.14 0 2 4 -38264.166 -38229.830 -38264.158
metrics.15 0 0 5 -20404.824 -20356.752 -20404.808
metrics.16 0 1 5 -38289.156 -38247.952 -38289.144
metrics.17 0 2 5 -38268.096 -38226.893 -38268.084
metrics.18 1 0 0 -38253.069 -38232.466 -38253.065
metrics.19 1 1 0 -38276.983 -38263.248 -38276.981
metrics.20 1 2 0 -35389.618 -35375.884 -35389.617
metrics.21 1 0 1 -38273.196 -38245.727 -38273.191
metrics.22 1 1 1 -38280.697 -38260.095 -38280.694
metrics.23 1 2 1 -38261.101 -38240.500 -38261.098
metrics.24 1 0 2 -38276.458 -38242.121 -38276.450
metrics.25 1 1 2 -38279.679 -38252.210 -38279.673
metrics.26 1 2 2 -38264.810 -38237.341 -38264.804
metrics.27 1 0 3 -38274.873 -38233.669 -38274.861
metrics.28 1 1 3 -38278.125 -38243.788 -38278.116
metrics.29 1 2 3 -38263.050 -38228.715 -38263.042
metrics.30 1 0 4 -38278.691 -38230.619 -38278.675
metrics.31 1 1 4 -38285.875 -38244.672 -38285.864
metrics.32 1 2 4 -38262.293 -38221.090 -38262.281
metrics.33 1 0 5 -38283.843 -38228.904 -38283.823
metrics.34 1 1 5 -38287.200 -38239.129 -38287.185
metrics.35 1 2 5 -38270.071 -38222.001 -38270.055
metrics.36 2 0 0 -38271.935 -38244.465 -38271.929
metrics.37 2 1 0 -38281.525 -38260.923 -38281.522
metrics.38 2 2 0 -36208.321 -36187.719 -36208.317
metrics.39 2 0 1 -38275.584 -38241.246 -38275.575
metrics.40 2 1 1 -38279.459 -38251.990 -38279.453
metrics.41 2 2 1 -38265.661 -38238.192 -38265.655
metrics.42 2 0 2 -38274.514 -38233.310 -38274.503
metrics.43 2 1 2 -38282.616 -38248.280 -38282.608
metrics.44 2 2 2 -38260.420 -38226.085 -38260.412
metrics.45 2 0 3 -38272.524 -38224.452 -38272.508
metrics.46 2 1 3 -38281.098 -38239.894 -38281.086
metrics.47 2 2 3 -38260.817 -38219.614 -38260.805
metrics.48 2 0 4 -38280.540 -38225.601 -38280.520
metrics.49 2 1 4 -38282.392 -38234.321 -38282.376
metrics.50 2 2 4 -38259.867 -38211.797 -38259.851
metrics.51 2 0 5 -38281.788 -38219.981 -38281.763
metrics.52 2 1 5 -38286.645 -38231.707 -38286.625
metrics.53 2 2 5 -38263.238 -38208.301 -38263.218
metrics.54 3 0 0 -38276.387 -38242.050 -38276.378
metrics.55 3 1 0 -38279.786 -38252.317 -38279.781
metrics.56 3 2 0 -36587.005 -36559.537 -36587.000
metrics.57 3 0 1 -38273.445 -38232.240 -38273.433
metrics.58 3 1 1 -38282.853 -38248.517 -38282.845
metrics.59 3 2 1 -38263.927 -38229.591 -38263.918
metrics.60 3 0 2 -38271.534 -38223.462 -38271.518
metrics.61 3 1 2 -38281.577 -38240.374 -38281.566
metrics.62 3 2 2 -38267.030 -38225.827 -38267.018
metrics.63 3 0 3 -38269.656 -38214.716 -38269.635
metrics.64 3 1 3 -38290.447 -38242.376 -38290.431
metrics.65 3 2 3 -38257.855 -38209.785 -38257.840
metrics.66 3 0 4 -38276.680 -38214.873 -38276.655
metrics.67 3 1 4 -38290.211 -38235.273 -38290.191
metrics.68 3 2 4 -38259.447 -38204.510 -38259.427
metrics.69 3 0 5 -38277.020 -38208.346 -38276.989
metrics.70 3 1 5 -38288.603 -38226.797 -38288.577
metrics.71 3 2 5 -38266.888 -38205.084 -38266.863
metrics.72 4 0 0 -38274.620 -38233.415 -38274.608
metrics.73 4 1 0 -38285.341 -38251.005 -38285.333
metrics.74 4 2 0 -36840.327 -36805.992 -36840.319
metrics.75 4 0 1 -38272.429 -38224.357 -38272.413
metrics.76 4 1 1 -38287.210 -38246.006 -38287.198
metrics.77 4 2 1 -38269.509 -38228.306 -38269.497
metrics.78 4 0 2 -38276.488 -38221.549 -38276.468
metrics.79 4 1 2 -38292.479 -38244.408 -38292.463
metrics.80 4 2 2 -38271.481 -38223.411 -38271.465
metrics.81 4 0 3 -38268.619 -38206.812 -38268.593
metrics.82 4 1 3 -38290.669 -38235.731 -38290.649
metrics.83 4 2 3 -38269.546 -38214.609 -38269.526
metrics.84 4 0 4 -38267.221 -38198.547 -38267.190
metrics.85 4 1 4 -38289.035 -38227.230 -38289.010
metrics.86 4 2 4 -38263.447 -38201.643 -38263.421
metrics.87 4 0 5 -38283.278 -38207.736 -38283.241
metrics.88 4 1 5 -38286.720 -38218.047 -38286.689
metrics.89 4 2 5 -38270.730 -38202.058 -38270.699
metrics.90 5 0 0 -38280.076 -38232.004 -38280.061
metrics.91 5 1 0 -38290.263 -38249.060 -38290.252
metrics.92 5 2 0 -37080.957 -37039.755 -37080.946
metrics.93 5 0 1 -38281.862 -38226.923 -38281.842
metrics.94 5 1 1 -38288.239 -38240.168 -38288.223
metrics.95 5 2 1 -38274.462 -38226.392 -38274.447
metrics.96 5 0 2 -38287.111 -38225.304 -38287.085
metrics.97 5 1 2 -38287.995 -38233.056 -38287.974
metrics.98 5 2 2 -38265.888 -38210.950 -38265.867
metrics.99 5 0 3 -38285.862 -38217.188 -38285.831
metrics.100 5 1 3 -38288.605 -38226.799 -38288.580
metrics.101 5 2 3 -38271.437 -38209.632 -38271.411
metrics.102 5 0 4 -38284.741 -38209.200 -38284.704
metrics.103 5 1 4 -38286.749 -38218.077 -38286.718
metrics.104 5 2 4 -38272.962 -38204.290 -38272.931
metrics.105 5 0 5 -38281.259 -38198.850 -38281.215
metrics.106 5 1 5 -38293.230 -38217.690 -38293.193
metrics.107 5 2 5 -38264.398 -38188.860 -38264.361

From the table, arima(0,1,1) has lowest BIC and arima(5,1,5) has lowest AIC and AICc

Code
auto_model1<-auto.arima(log(kospi_ts))
auto_model1
Series: log(kospi_ts) 
ARIMA(2,1,2) with drift 

Coefficients:
         ar1     ar2      ma1      ma2  drift
      0.5565  0.0722  -0.5026  -0.1367  2e-04
s.e.  0.2566  0.2331   0.2544   0.2287  2e-04

sigma^2 = 0.0002655:  log likelihood = 19146.99
AIC=-38281.98   AICc=-38281.97   BIC=-38240.78

auto.arima() suggest ARIMA(2,1,2) model

Code
kospi_data<-log(kospi_ts)
kospi_output1 <- capture.output(sarima(kospi_data, 0, 1, 1))

Code
start_line <- grep("Coefficients", kospi_output1) 
end_line <- length(kospi_output1)  
cat(kospi_output1[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ma1        0.0571 0.0122  4.6839  0.0000
constant   0.0002 0.0002  1.1237  0.2612

sigma^2 estimated as 0.0002657487 on 7094 degrees of freedom 
 
AIC = -5.394236  AICc = -5.394236  BIC = -5.391333 
 
Code
kospi_output2 <- capture.output(sarima(kospi_data, 5, 1, 5))

Code
start_line <- grep("Coefficients", kospi_output2) 
end_line <- length(kospi_output2)  
cat(kospi_output2[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ar1       -0.1215 0.3386 -0.3588  0.7198
ar2       -0.2424    NaN     NaN     NaN
ar3       -0.3047 0.1665 -1.8306  0.0672
ar4       -0.3587    NaN     NaN     NaN
ar5       -0.4073 0.3252 -1.2525  0.2104
ma1        0.1737 0.3328  0.5219  0.6018
ma2        0.2205    NaN     NaN     NaN
ma3        0.3098 0.1597  1.9394  0.0525
ma4        0.3444    NaN     NaN     NaN
ma5        0.3908 0.3151  1.2405  0.2148
constant   0.0002 0.0002  1.1625  0.2451

sigma^2 estimated as 0.0002647793 on 7085 degrees of freedom 
 
AIC = -5.395351  AICc = -5.395345  BIC = -5.383737 
 
Code
kospi_output3 <- capture.output(sarima(kospi_data, 2, 1, 2))

Code
start_line <- grep("Coefficients", kospi_output3) 
end_line <- length(kospi_output3)  
cat(kospi_output3[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ar1        0.5565 0.2566  2.1684  0.0302
ar2        0.0722 0.2331  0.3096  0.7569
ma1       -0.5026 0.2544 -1.9756  0.0482
ma2       -0.1367 0.2287 -0.5977  0.5500
constant   0.0002 0.0002  1.2906  0.1969

sigma^2 estimated as 0.0002653563 on 7091 degrees of freedom 
 
AIC = -5.394868  AICc = -5.394867  BIC = -5.389061 
 

Three models were tested: ARIMA(0,1,1), ARIMA(5,1,5), and ARIMA(2,1,2).

From the model diagnostics for these three candidates, the results are not fully satisfactory. Therefore, to decide which model to use, I compare their AIC and BIC values as well as the significance of the coefficients. Based on these criteria, I choose the ARIMA(0,1,1) model, since it has the lowest BIC value.

residual fit

Code
arima_fit1 <- Arima(log(kospi_ts), order = c(0,1,1), include.drift = TRUE)
res_arima1<- residuals(arima_fit1)

kospi_res_acf <- ggAcf(res_arima1,lag.max = 30)+ggtitle("ACF Plot of ARIMA(0,1,1) Residuals") 

kospi_res_pacf <- ggPacf(res_arima1,lag.max = 30)+ggtitle("PACF Plot of ARIMA(0,1,1) Residuals") 

kospi_res_acf/kospi_res_pacf

Code
arima_fit1 <- Arima(log(kospi_ts), order = c(0,1,1), include.drift = TRUE)
res_arima1<- residuals(arima_fit1)

kospi_res_acf <- ggAcf(res_arima1^2,lag.max = 30)+ggtitle("ACF Plot of ARIMA(0,1,1) Residuals") 

kospi_res_pacf <- ggPacf(res_arima1^2,lag.max = 30)+ggtitle("PACF Plot of ARIMA(0,1,1) Residuals") 

kospi_res_acf/kospi_res_pacf

From the ACF and PACF plots of the residuals and squared residuals for the ARIMA(0,1,1) model, the ACF of the squared residuals shows noticeable spikes up to lag 5, followed by a gradual decline. This suggests the presence of an ARCH effect, indicating time-varying volatility in the data. Based on these plots, an appropriate range for manual model search is p,q both in 0:5 range.

Code
library(tseries)
library(knitr)
library(kableExtra)

# Initialize list to store models and track p, q
models <- list()
pq_combinations <- data.frame(p = integer(), q = integer(), AIC = numeric())
cc <- 1

# Fit GARCH(p, q) models for p = 1:7 and q = 1:7
for (p in 0:5) {
  for (q in 0:5) {
    fit <- tryCatch(
      garch(res_arima1, order = c(q, p), trace = FALSE),
      error = function(e) NULL
    )
    if (!is.null(fit)) {
      models[[cc]] <- fit
      pq_combinations <- rbind(
        pq_combinations,
        data.frame(p = p, q = q, AIC = AIC(fit))
      )
      cc <- cc + 1
    }
  }
}

# Round AIC for presentation
pq_combinations$AIC <- round(pq_combinations$AIC, 3)

# Identify row with minimum AIC
highlight_row <- which.min(pq_combinations$AIC)

# Create formatted table
pq_combinations %>%
  kable(
    caption = "AIC Comparison for GARCH(p, q) Models",
    col.names = c("ARCH Order (p)", "GARCH Order (q)", "AIC"),
    align = c("c", "c", "c"),
    booktabs = TRUE
  ) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  row_spec(highlight_row, bold = TRUE, background = "#FFF59D")
AIC Comparison for GARCH(p, q) Models
ARCH Order (p) GARCH Order (q) AIC
0 1 -38279.50
0 2 -38272.96
0 3 -38264.56
0 4 -38258.07
0 5 -38250.50
1 0 -39310.73
1 1 -41741.28
1 2 -41735.45
1 3 -41720.48
1 4 -41681.63
1 5 -41646.06
2 0 -40159.82
2 1 -41735.07
2 2 -41733.12
2 3 -41754.15
2 4 -41727.00
2 5 -41719.92
3 0 -40581.16
3 1 -41630.17
3 2 -41737.87
3 3 -41702.15
3 4 -41691.90
3 5 -41706.77
4 0 -40893.89
4 1 -41473.17
4 2 -41587.73
4 3 -41620.88
4 4 -41664.12
4 5 -41722.97
5 0 -41103.55
5 1 -41552.01
5 2 -41585.24
5 3 -41599.98
5 4 -41613.78
5 5 -41712.33
Code
cat("\nSummary for GARCH(2,3):\n")

Summary for GARCH(2,3):
Code
garch_model <- garchFit(~ garch(2, 3), data = res_arima1, trace = FALSE)
summary(garch_model)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(2, 3), data = res_arima1, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(2, 3)
<environment: 0x1374353c0>
 [data = res_arima1]

Conditional Distribution:
 norm 

Coefficient(s):
        mu       omega      alpha1      alpha2       beta1       beta2  
1.0481e-05  2.3678e-06  9.1156e-02  6.7207e-02  4.1120e-01  1.0000e-08  
     beta3  
4.2549e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     1.048e-05   1.234e-04    0.085   0.9323    
omega  2.368e-06   5.141e-07    4.606 4.11e-06 ***
alpha1 9.116e-02   1.291e-02    7.060 1.66e-12 ***
alpha2 6.721e-02   2.354e-02    2.855   0.0043 ** 
beta1  4.112e-01   2.247e-01    1.830   0.0672 .  
beta2  1.000e-08   3.332e-01    0.000   1.0000    
beta3  4.255e-01   1.594e-01    2.670   0.0076 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 20881.73    normalized:  2.942332 

Description:
 Thu Nov 20 15:29:25 2025 by user:  


Standardised Residuals Tests:
                                  Statistic   p-Value
 Jarque-Bera Test   R    Chi^2  1043.423936 0.0000000
 Shapiro-Wilk Test  R    W               NA        NA
 Ljung-Box Test     R    Q(10)     8.752495 0.5557429
 Ljung-Box Test     R    Q(15)    11.756310 0.6973730
 Ljung-Box Test     R    Q(20)    16.536223 0.6828505
 Ljung-Box Test     R^2  Q(10)     3.720897 0.9590607
 Ljung-Box Test     R^2  Q(15)    10.652950 0.7767858
 Ljung-Box Test     R^2  Q(20)    15.884081 0.7237891
 LM Arch Test       R    TR^2      6.570920 0.8846192

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-5.882690 -5.875917 -5.882692 -5.880358 
Code
cat("\nSummary for GARCH(2,1):\n")

Summary for GARCH(2,1):
Code
garch_model <- garchFit(~ garch(2, 1), data = res_arima1, trace = FALSE)
summary(garch_model)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(2, 1), data = res_arima1, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(2, 1)
<environment: 0x113622b28>
 [data = res_arima1]

Conditional Distribution:
 norm 

Coefficient(s):
        mu       omega      alpha1      alpha2       beta1  
1.0481e-05  1.3043e-06  8.6268e-02  1.0000e-08  9.1121e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     1.048e-05   1.229e-04    0.085    0.932    
omega  1.304e-06   2.508e-07    5.200 2.00e-07 ***
alpha1 8.627e-02   1.294e-02    6.668 2.59e-11 ***
alpha2 1.000e-08   1.433e-02    0.000    1.000    
beta1  9.112e-01   7.507e-03  121.388  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 20877.07    normalized:  2.941675 

Description:
 Thu Nov 20 15:29:25 2025 by user:  


Standardised Residuals Tests:
                                  Statistic   p-Value
 Jarque-Bera Test   R    Chi^2  1076.235300 0.0000000
 Shapiro-Wilk Test  R    W               NA        NA
 Ljung-Box Test     R    Q(10)     8.607071 0.5697548
 Ljung-Box Test     R    Q(15)    11.554181 0.7124122
 Ljung-Box Test     R    Q(20)    16.525822 0.6835144
 Ljung-Box Test     R^2  Q(10)     6.465670 0.7747412
 Ljung-Box Test     R^2  Q(15)    13.915196 0.5319704
 Ljung-Box Test     R^2  Q(20)    19.547251 0.4865520
 LM Arch Test       R    TR^2      9.209799 0.6849141

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-5.881941 -5.877103 -5.881942 -5.880275 
Code
cat("\nSummary for GARCH(1,1):\n")

Summary for GARCH(1,1):
Code
garch_model <- garchFit(~ garch(1,1), data = res_arima1, trace = FALSE)
summary(garch_model)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(1, 1), data = res_arima1, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(1, 1)
<environment: 0x132245e00>
 [data = res_arima1]

Conditional Distribution:
 norm 

Coefficient(s):
        mu       omega      alpha1       beta1  
1.0481e-05  1.3026e-06  8.6232e-02  9.1126e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     1.048e-05   1.229e-04    0.085    0.932    
omega  1.303e-06   2.438e-07    5.342 9.18e-08 ***
alpha1 8.623e-02   6.930e-03   12.443  < 2e-16 ***
beta1  9.113e-01   6.704e-03  135.931  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 20876.89    normalized:  2.94165 

Description:
 Thu Nov 20 15:29:26 2025 by user:  


Standardised Residuals Tests:
                                  Statistic   p-Value
 Jarque-Bera Test   R    Chi^2  1076.066647 0.0000000
 Shapiro-Wilk Test  R    W               NA        NA
 Ljung-Box Test     R    Q(10)     8.615560 0.5689348
 Ljung-Box Test     R    Q(15)    11.566634 0.7114908
 Ljung-Box Test     R    Q(20)    16.535227 0.6829141
 Ljung-Box Test     R^2  Q(10)     6.474265 0.7739693
 Ljung-Box Test     R^2  Q(15)    13.920620 0.5315579
 Ljung-Box Test     R^2  Q(20)    19.554856 0.4860667
 LM Arch Test       R    TR^2      9.215639 0.6844100

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-5.882172 -5.878302 -5.882173 -5.880839 

By manually searching over possible p,qp, qp,q values, the GARCH(2,3) model was found to have the lowest AIC. However, when fitting this model, beta1 and beta2​ were not statistically significant, with large p-values, suggesting that trying models with slightly higher AIC values could be appropriate. After testing two additional models, all coefficients in the GARCH(1,1) model were found to be significant, and its AIC was very close to the minimum observed. Therefore, the GARCH(1,1) model is selected for further analysis.

Code
arima_fit1 <- Arima(log(kospi_ts), order = c(0,1,1), include.drift = TRUE)
arima_res1<-residuals(arima_fit1)
final.fit1<- garchFit(~ garch(1,1), data = arima_res1, trace = FALSE)
summary(final.fit1)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(1, 1), data = arima_res1, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(1, 1)
<environment: 0x1220f8908>
 [data = arima_res1]

Conditional Distribution:
 norm 

Coefficient(s):
        mu       omega      alpha1       beta1  
1.0481e-05  1.3026e-06  8.6232e-02  9.1126e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     1.048e-05   1.229e-04    0.085    0.932    
omega  1.303e-06   2.438e-07    5.342 9.18e-08 ***
alpha1 8.623e-02   6.930e-03   12.443  < 2e-16 ***
beta1  9.113e-01   6.704e-03  135.931  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 20876.89    normalized:  2.94165 

Description:
 Thu Nov 20 15:29:26 2025 by user:  


Standardised Residuals Tests:
                                  Statistic   p-Value
 Jarque-Bera Test   R    Chi^2  1076.066647 0.0000000
 Shapiro-Wilk Test  R    W               NA        NA
 Ljung-Box Test     R    Q(10)     8.615560 0.5689348
 Ljung-Box Test     R    Q(15)    11.566634 0.7114908
 Ljung-Box Test     R    Q(20)    16.535227 0.6829141
 Ljung-Box Test     R^2  Q(10)     6.474265 0.7739693
 Ljung-Box Test     R^2  Q(15)    13.920620 0.5315579
 Ljung-Box Test     R^2  Q(20)    19.554856 0.4860667
 LM Arch Test       R    TR^2      9.215639 0.6844100

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-5.882172 -5.878302 -5.882173 -5.880839 
Code
garch_res1 <- residuals(final.fit1)
checkresiduals(garch_res1)


    Ljung-Box test

data:  Residuals
Q* = 24.189, df = 10, p-value = 0.007114

Model df: 0.   Total lags used: 10
Code
fc <-predict(final.fit1, n.ahead = 100)
invisible(predict(final.fit1, n.ahead = 100, plot = TRUE))

For the final model, an ARIMA(0,1,1) + GARCH(1,1) specification was used. The model diagnostics provide some insight into its performance. The residual plot shows a reasonably good pattern, with no obvious systematic structure and only a few noticeable spikes. The ACF plot of the residuals indicates that most lags fall within the significance bounds, suggesting that autocorrelation has been largely resolved.

However, the Ljung–Box test yields a very small p-value (0.007), indicating that the model fit is not fully satisfactory and that some dependence may remain in the residuals. Even so, this model is preferred because all of its coefficients are statistically significant, unlike the alternative specifications considered. Therefore, this is taken as the best model among the candidates.

Equation:

\[ \phi(B) x_t = \delta + \theta(B) y_t, \]

where

\[ \phi(B) = 1 - B, \qquad \theta(B) = 1 + \theta_1 B, \]

\[ y_t = \sigma_t \varepsilon_t, \qquad \varepsilon_t \sim \text{i.i.d. } N(0,1), \]

\[ \operatorname{var}(y_t \mid y_{t-1}) = \sigma_t^2 = \alpha_0 + \alpha_1 y_{t-1}^2 + \beta_1 \sigma_{t-1}^2. \]

\[ \Delta x_t = 0.0002 + y_t + 0.0571\,y_{t-1}, \]

\[ \sigma_t^2 = 1.3026\times 10^{-6} + 0.086232\,y_{t-1}^2 + 0.91126\,\sigma_{t-1}^2. \]

Modeling Approach 2: GARCH model

In the previous modeling, the overall model fit was not satisfactory. Therefore, in this section, a different modeling approach is used: directly applying models from the ARCH/GARCH family.

Code
kospi_acf_log <- ggAcf(kospi_return,lag.max = 30)+ggtitle("ACF Plot of KOSPI Index") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kospi_pacf_log <- ggPacf(kospi_return,lag.max = 30)+ggtitle("PACF Plot of KOSPI Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kospi_acf_log/kospi_pacf_log

Based on the ACF and PACF plots, the parameter search range for the manual selection process will be set from 0 to 5 for both p and q.

Code
# Initialize model storage
models <- list()
results <- data.frame(p = integer(), q = integer(), AIC = numeric())
cc <- 1

# Grid search for GARCH(p, q), where p = ARCH order, q = GARCH order
for (p in 1:5) {
  for (q in 1:5) {
    fit <- tryCatch(
      garch(kospi_return, order = c(q, p), trace = FALSE),
      error = function(e) NULL
    )
    if (!is.null(fit)) {
      models[[cc]] <- fit
      results <- rbind(results, data.frame(p = p, q = q, AIC = AIC(fit)))
      cc <- cc + 1
    }
  }
}

# Round AIC values for readability
results$AIC <- round(results$AIC, 3)

# Identify the model with the lowest AIC
highlight_row <- which.min(results$AIC)

# Display formatted table
results %>%
  kable(
    caption = "AIC Comparison for GARCH(p, q) Models",
    col.names = c("ARCH Order (p)", "GARCH Order (q)", "AIC"),
    align = c("c", "c", "c"),
    booktabs = TRUE
  ) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  row_spec(highlight_row, bold = TRUE, background = "#FFF59D")
AIC Comparison for GARCH(p, q) Models
ARCH Order (p) GARCH Order (q) AIC
1 1 -41720.63
1 2 -41712.15
1 3 -41700.60
1 4 -41694.91
1 5 -41657.04
2 1 -41711.77
2 2 -41710.51
2 3 -40956.99
2 4 -41716.38
2 5 -41692.39
3 1 -41601.49
3 2 -41713.82
3 3 -41673.10
3 4 -41686.95
3 5 -41676.55
4 1 -41481.96
4 2 -41771.15
4 3 -41592.23
4 4 -41687.38
4 5 -41701.31
5 1 -41494.61
5 2 -41541.75
5 3 -41679.61
5 4 -41680.56
5 5 -41694.14

Optimal model based on AIC is GARCH(4,2) model.

Code
garch_model1 <- garchFit(~ garch(4,2), data = kospi_return, trace = FALSE)
summary(garch_model1)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(4, 2), data = kospi_return, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(4, 2)
<environment: 0x10da9daa0>
 [data = kospi_return]

Conditional Distribution:
 norm 

Coefficient(s):
        mu       omega      alpha1      alpha2      alpha3      alpha4  
4.3079e-04  2.4548e-06  8.3322e-02  7.5768e-02  1.0000e-08  1.0000e-08  
     beta1       beta2  
9.7797e-02  7.3842e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     4.308e-04   1.236e-04    3.485 0.000491 ***
omega  2.455e-06   1.172e-06    2.095 0.036147 *  
alpha1 8.332e-02   1.731e-02    4.813 1.49e-06 ***
alpha2 7.577e-02   6.397e-02    1.184 0.236231    
alpha3 1.000e-08   3.072e-02    0.000 1.000000    
alpha4 1.000e-08   1.848e-02    0.000 1.000000    
beta1  9.780e-02   7.378e-01    0.133 0.894547    
beta2  7.384e-01   6.668e-01    1.107 0.268102    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 20871.86    normalized:  2.941355 

Description:
 Thu Nov 20 15:29:27 2025 by user:  


Standardised Residuals Tests:
                                  Statistic    p-Value
 Jarque-Bera Test   R    Chi^2  1164.706555 0.00000000
 Shapiro-Wilk Test  R    W               NA         NA
 Ljung-Box Test     R    Q(10)    26.923355 0.00267804
 Ljung-Box Test     R    Q(15)    30.128349 0.01146701
 Ljung-Box Test     R    Q(20)    35.002552 0.02009069
 Ljung-Box Test     R^2  Q(10)     5.823711 0.82985033
 Ljung-Box Test     R^2  Q(15)    13.124526 0.59268010
 Ljung-Box Test     R^2  Q(20)    18.795894 0.53512652
 LM Arch Test       R    TR^2      8.463032 0.74798062

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-5.880455 -5.872713 -5.880458 -5.877789 
Code
garch_model2 <- garchFit(~ garch(2,1), data = kospi_return, trace = FALSE)
summary(garch_model2)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(2, 1), data = kospi_return, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(2, 1)
<environment: 0x1333aa6f8>
 [data = kospi_return]

Conditional Distribution:
 norm 

Coefficient(s):
        mu       omega      alpha1      alpha2       beta1  
4.3437e-04  1.3374e-06  8.7170e-02  1.0000e-08  9.1030e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     4.344e-04   1.234e-04    3.519 0.000433 ***
omega  1.337e-06   2.546e-07    5.253 1.49e-07 ***
alpha1 8.717e-02   1.302e-02    6.696 2.15e-11 ***
alpha2 1.000e-08   1.439e-02    0.000 0.999999    
beta1  9.103e-01   7.581e-03  120.083  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 20871.64    normalized:  2.941324 

Description:
 Thu Nov 20 15:29:27 2025 by user:  


Standardised Residuals Tests:
                                  Statistic     p-Value
 Jarque-Bera Test   R    Chi^2  1154.123087 0.000000000
 Shapiro-Wilk Test  R    W               NA          NA
 Ljung-Box Test     R    Q(10)    26.809723 0.002791025
 Ljung-Box Test     R    Q(15)    30.007624 0.011894034
 Ljung-Box Test     R    Q(20)    34.899755 0.020644719
 Ljung-Box Test     R^2  Q(10)     5.463024 0.858183043
 Ljung-Box Test     R^2  Q(15)    12.876270 0.611854288
 Ljung-Box Test     R^2  Q(20)    18.539767 0.551896747
 LM Arch Test       R    TR^2      8.216584 0.767984231

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-5.881239 -5.876400 -5.881240 -5.879573 
Code
garch_model3 <- garchFit(~ garch(1,1), data = kospi_return, trace = FALSE)
summary(garch_model3)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(1, 1), data = kospi_return, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(1, 1)
<environment: 0x125c03078>
 [data = kospi_return]

Conditional Distribution:
 norm 

Coefficient(s):
        mu       omega      alpha1       beta1  
4.3479e-04  1.3385e-06  8.7224e-02  9.1024e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     4.348e-04   1.233e-04    3.525 0.000423 ***
omega  1.338e-06   2.475e-07    5.407  6.4e-08 ***
alpha1 8.722e-02   7.010e-03   12.442  < 2e-16 ***
beta1  9.102e-01   6.779e-03  134.266  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 20871.81    normalized:  2.941349 

Description:
 Thu Nov 20 15:29:27 2025 by user:  


Standardised Residuals Tests:
                                  Statistic     p-Value
 Jarque-Bera Test   R    Chi^2  1154.190795 0.000000000
 Shapiro-Wilk Test  R    W               NA          NA
 Ljung-Box Test     R    Q(10)    26.785630 0.002815565
 Ljung-Box Test     R    Q(15)    29.980295 0.011992750
 Ljung-Box Test     R    Q(20)    34.874716 0.020781772
 Ljung-Box Test     R^2  Q(10)     5.464082 0.858103131
 Ljung-Box Test     R^2  Q(15)    12.886787 0.611042151
 Ljung-Box Test     R^2  Q(20)    18.551672 0.551115659
 LM Arch Test       R    TR^2      8.215222 0.768093388

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-5.881571 -5.877700 -5.881572 -5.880238 

When testing the model with the lowest AIC, the overall fit was strong; however, not all coefficients were statistically significant, with some showing large p-values. Therefore, two additional models were considered: one with an AIC value close to the minimum and another specified as GARCH(1,1).Comparing the three models, all coefficients in the GARCH(1,1) model were significant, so this model is selected as the final choice.

Code
garch.fit2 <- garch(kospi_return, order = c(1,1), trace = FALSE) 
summary(garch.fit2)

Call:
garch(x = kospi_return, order = c(1, 1), trace = FALSE)

Model:
GARCH(1,1)

Residuals:
     Min       1Q   Median       3Q      Max 
-8.02105 -0.53565  0.05735  0.61738  4.35545 

Coefficient(s):
    Estimate  Std. Error  t value Pr(>|t|)    
a0 1.278e-06   1.779e-07    7.185 6.71e-13 ***
a1 8.535e-02   3.886e-03   21.964  < 2e-16 ***
b1 9.123e-01   3.835e-03  237.881  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostic Tests:
    Jarque Bera Test

data:  Residuals
X-squared = 1208.1, df = 2, p-value < 2.2e-16


    Box-Ljung test

data:  Squared.Residuals
X-squared = 0.84606, df = 1, p-value = 0.3577
Code
garch_res1 <- residuals(garch.fit2)
checkresiduals(garch_res1)


    Ljung-Box test

data:  Residuals
Q* = 533.52, df = 504, p-value = 0.1754

Model df: 0.   Total lags used: 504

From model diagnostics, this model shows similar result for ACF and residual plot compare to other model approach. However, for Ljung-Box test, it has much better p-value, indicating that this model has better model fit compare to other method.

Equation:

\[ r_t = \sigma_t \varepsilon_t, \qquad \varepsilon_t \sim \text{i.i.d. } N(0,1), \]

\[ \sigma_t^2 = \alpha_0 + \alpha_1 r_{t-1}^2 + \beta_1 \sigma_{t-1}^2. \]

\[ \alpha_0 = 1.278\times 10^{-6}, \quad \alpha_1 = 0.08535, \quad \beta_1 = 0.9123, \]

\[ \sigma_t^2 = 1.278\times 10^{-6} + 0.08535\, r_{t-1}^2 + 0.9123\, \sigma_{t-1}^2. \]

Forecasting plot(365 days) : KOSPI Index

Code
library(fGarch)
library(lubridate)
library(dplyr)
library(plotly)


gfit <- garchFit(~ garch(1,1),
                 data = as.numeric(kospi_return),
                 trace = FALSE)

h <- 365
pf <- predict(gfit, n.ahead = h)
rhat <- as.numeric(pf$meanForecast)


last_price <- tail(as.numeric(kospi_close), 1)
price_fc <- numeric(h)
price_fc[1] <- last_price * exp(rhat[1])
for (i in 2:h) {
  price_fc[i] <- price_fc[i-1] * exp(rhat[i])
}

last_date <- tail(kospi$Date, 1) 
future_dates <- seq(last_date + 1, by = "day", length.out = h)

hist_df <- data.frame(
  Date = kospi$Date,  
  Price = as.numeric(kospi_close)
)

fc_df <- data.frame(Date = future_dates, Price = price_fc)

recent_window <- 3650 # recent 10 years
hist_df_recent <- tail(hist_df, recent_window)

plot_ly() %>%
  add_lines(data = hist_df_recent, x = ~Date, y = ~Price, 
            name = "Historical", line = list(color="blue")) %>%
  add_lines(data = fc_df, x = ~Date, y = ~Price, 
            name = "GARCH Forecast", 
            line = list(color="red", dash="dash")) %>%
  layout(title = "KOSPI — GARCH(1,1) 365-Day Price Forecast",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Index"),
         hovermode = "x unified")

Volatility plot : KOSPI Index

Code
library(fGarch)
set.seed(1)

garch.fit11 <- garchFit(~ garch(1,1), data = kospi_return, trace = FALSE)
ht <- garch.fit11@h.t


data2 <- data.frame(Date = kospi$Date[-1][1:length(ht)], Volatility = ht)



p <- ggplot(data2, aes(x = Date, y = Volatility)) +
  geom_line(color = 'orange') +
  labs(
    title = "Conditional Variance from GARCH(1,1) Model",
    x = "Date",
    y = "Conditional Variance"
  ) +
  theme_minimal()

ggplotly(p)

:::

Financial time series moodel(KRW/USD spot rate)

Code
p_us <- plot_ly(kr, x = ~Date, y = ~krw, type = "scatter", mode = "lines",
                 name = "USD per KRW") %>%
  layout(title = "USD per KRW",
         xaxis = list(title = "Date"),
         yaxis = list(title = "USD per KRW"))
p_us
Code
kr_ts <- ts(kr$krw,
          start = decimal_date(as.Date("1996-12-11")),
          frequency = 252)

#kr_return<- diff(kr_ts)
kr_return <- diff(kr_ts) / kr_ts[-length(kr_ts)]
autoplot(kr_return) +
  ggtitle("Daily Returns of KRW/USD exchange spot") +
  xlab("Time") +
  ylab("Log Return")

As discussed in the univariate and multivariate time series model sections, for exchange rate data,log transformation data didn’t perform well, so original data was applied. The first plot is a time series plot of the KRW/USD exchange spot. Plot shows when there is an economic crisis(ex.2009 financial crisis), KRW against USD increases significantly.

The second plot shows the returns of theKRW/USD exchange spot. In this plot, volatility clustering is clearly observed: for most periods, the values are centered near zero, but in certain periods the series exhibits high volatility before returning to calmer behavior. This suggests that the data are potentially appropriate for modeling with the ARCH/GARCH family of models.

ACF & PACF plot

Code
library(patchwork)
kr_acf <- ggAcf(kr_return,lag.max = 30)+ggtitle("ACF Plot of KRW/USD exchange spot") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kr_pacf <- ggPacf(kr_return,lag.max = 30)+ggtitle("PACF Plot of KRW/USD exchange spot") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kr_acf/kr_pacf

From the plots, KOSPI Index data performs well in terms of autocorrelation by applying log transformation, However there are still some lag points showing autocorrelation.

Code
ggAcf(abs(kr_return), lag.max = 30) +
  ggtitle("ACF plot of Absolute 
          KRW/USD exchange spot") +
  xlab("Lag") +
  ylab("Autocorrelation") +
  theme_minimal()

Code
ggAcf(kr_return^2, lag.max = 30) +
  ggtitle("ACF plot of Squared
          KRW/USD exchange spot ") +
  xlab("Lag") +
  ylab("Autocorrelation") +
  theme_minimal()

Code
# Load required package
library(FinTS)

# Perform ARCH LM Test on Bitcoin returns
arch_test_result <- ArchTest(kr_return, lags = 1, demean = TRUE)

# Print results
print(arch_test_result)

    ARCH LM-test; Null hypothesis: no ARCH effects

data:  kr_return
Chi-squared = 367.16, df = 1, p-value < 2.2e-16

p-vlaue is smaller than 0.05(default criterion), indicating rejecting null hypothesis, concluding strong evidence for the presence of ARCH(1) effects in the data.

Two plots for applying absolute value and squaring to KRW/USD exchange spot shows significant autocorrelation throughout multiple lags, compare to log return. This implies that the returns are not independently and identically distributed, and gives evidence for need to apply conditional variance models such as ARCH or GARCH to capture the underlying time-varying volatility structure.

Modeling 1: ARIMA +GARCH model

Code
library(patchwork)

kr_acf <- ggAcf(kr_ts,lag.max = 30)+ggtitle("ACF Plot of KRW/USD exchange spot ") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kr_pacf <- ggPacf(kr_ts,lag.max = 30)+ggtitle("PACF Plot of KRW/USD exchange spot") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kr_acf/kr_pacf

Code
kr_acf_log <- ggAcf(kr_return,lag.max = 30)+ggtitle("ACF Plot of KRW/USD exchange spot") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kr_pacf_log <- ggPacf(kr_return,lag.max = 30)+ggtitle("PACF Plot of KRW/USD exchange spot") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kr_acf_log/kr_pacf_log

ACF plot shows significant autocorrelation across lags. Also in the PACF plot, it has significant autocorrelation, then drops significantly, indicating almost no autocorrelation after lag1.these plots indicate the presence of dependencies between past and current price movements, series is stationary.

ACF and PACF plot for KRW/USD exchange spot return shows that most of the lags are within threshold, however there are lags that spike, for example like lag4. For manual search, a search range from 0 to 4 seems appropriate.

And both plots have similar patterns, indicating that conditional heteroskedasticity is present. So applying the ARCH/GARCH family model is appropriate.

Code
ARIMA.c <- function(p_min, p_max, q_min, q_max, data) {
  results <- matrix(NA, nrow = 0, ncol = 6)
  colnames(results) <- c("p", "d", "q", "AIC", "BIC", "AICc")

  for (p in p_min:p_max) {
    for (q in q_min:q_max) {
      for (d in 0:2) {
        if ((p + d + q) <= 12) {  # Complexity constraint
          fit <- Arima(data, order = c(p, d, q))
          metrics <- c(p, d, q, fit$aic, fit$bic, fit$aicc)
          results <- rbind(results, metrics)
        }
      }
    }
  }

  # Convert to data frame
  results_df <- as.data.frame(results)
  colnames(results_df) <- c("p", "d", "q", "AIC", "BIC", "AICc")
  return(results_df)
}

highlight_output = function(output, type="ARIMA") {
    highlight_row <- c(which.min(output$AIC), which.min(output$BIC), which.min(output$AICc))
    knitr::kable(output, align = 'c', caption = paste("Comparison of", type, "Models")) %>%
    kable_styling(full_width = FALSE, position = "center") %>%
    row_spec(highlight_row, bold = TRUE, background = "#FFFF99")  # Highlight row in yellow
}

output <- ARIMA.c(p_min = 0, p_max = 4, q_min = 0, q_max = 4, data = kr_ts)
highlight_output(output)
Comparison of ARIMA Models
p d q AIC BIC AICc
metrics 0 0 0 133380.89 133395.41 133380.89
metrics.1 0 1 0 77418.88 77426.14 77418.88
metrics.2 0 2 0 84343.75 84351.01 84343.75
metrics.3 0 0 1 119630.66 119652.45 119630.67
metrics.4 0 1 1 77409.36 77423.88 77409.36
metrics.5 0 2 1 77423.45 77437.97 77423.45
metrics.6 0 0 2 109201.85 109230.89 109201.85
metrics.7 0 1 2 77403.63 77425.41 77403.63
metrics.8 0 2 2 77413.89 77435.68 77413.89
metrics.9 0 0 3 101726.74 101763.05 101726.75
metrics.10 0 1 3 77377.29 77406.33 77377.29
metrics.11 0 2 3 77408.12 77437.16 77408.12
metrics.12 0 0 4 96586.71 96630.27 96586.71
metrics.13 0 1 4 77271.46 77307.77 77271.47
metrics.14 0 2 4 77381.88 77418.18 77381.88
metrics.15 1 0 0 77425.75 77447.54 77425.75
metrics.16 1 1 0 77408.85 77423.37 77408.85
metrics.17 1 2 0 81411.00 81425.52 81411.00
metrics.18 1 0 1 77415.40 77444.45 77415.40
metrics.19 1 1 1 77410.01 77431.79 77410.01
metrics.20 1 2 1 77413.38 77435.16 77413.38
metrics.21 1 0 2 77408.76 77445.06 77408.76
metrics.22 1 1 2 77401.97 77431.02 77401.98
metrics.23 1 2 2 77414.60 77443.64 77414.60
metrics.24 1 0 3 77384.36 77427.93 77384.37
metrics.25 1 1 3 77286.71 77323.01 77286.71
metrics.26 1 2 3 77406.58 77442.88 77406.58
metrics.27 1 0 4 77281.61 77332.44 77281.62
metrics.28 1 1 4 77263.22 77306.79 77263.23
metrics.29 1 2 4 77291.73 77335.30 77291.74
metrics.30 2 0 0 77414.82 77443.86 77414.82
metrics.31 2 1 0 77406.69 77428.47 77406.69
metrics.32 2 2 0 80564.15 80585.94 80564.16
metrics.33 2 0 1 77393.24 77429.54 77393.24
metrics.34 2 1 1 77404.93 77433.97 77404.93
metrics.35 2 2 1 77411.19 77440.23 77411.19
metrics.36 2 0 2 77390.55 77434.12 77390.56
metrics.37 2 1 2 77354.57 77390.88 77354.58
metrics.38 2 2 2 77409.42 77445.73 77409.43
metrics.39 2 0 3 77373.41 77424.24 77373.42
metrics.40 2 1 3 77145.89 77189.46 77145.90
metrics.41 2 2 3 77359.08 77402.64 77359.08
metrics.42 2 0 4 77316.00 77374.09 77316.01
metrics.43 2 1 4 77229.32 77280.15 77229.33
metrics.44 2 2 4 77360.56 77411.39 77360.57
metrics.45 3 0 0 77412.09 77448.39 77412.09
metrics.46 3 1 0 77374.97 77404.02 77374.98
metrics.47 3 2 0 80166.71 80195.76 80166.72
metrics.48 3 0 1 77410.33 77453.90 77410.33
metrics.49 3 1 1 77269.28 77305.59 77269.29
metrics.50 3 2 1 77379.55 77415.86 77379.56
metrics.51 3 0 2 77365.36 77416.19 77365.37
metrics.52 3 1 2 77146.04 77189.61 77146.05
metrics.53 3 2 2 77274.70 77318.27 77274.71
metrics.54 3 0 3 77155.25 77213.34 77155.27
metrics.55 3 1 3 77148.73 77199.56 77148.74
metrics.56 3 2 3 77361.41 77412.24 77361.42
metrics.57 3 0 4 77240.38 77305.73 77240.39
metrics.58 3 1 4 77239.23 77297.32 77239.24
metrics.59 3 2 4 77312.08 77370.17 77312.09
metrics.60 4 0 0 77381.87 77425.44 77381.88
metrics.61 4 1 0 77284.60 77320.90 77284.60
metrics.62 4 2 0 79621.35 79657.65 79621.35
metrics.63 4 0 1 77280.47 77331.30 77280.48
metrics.64 4 1 1 77244.45 77288.02 77244.46
metrics.65 4 2 1 77289.30 77332.86 77289.30
metrics.66 4 0 2 77310.88 77368.97 77310.89
metrics.67 4 1 2 77242.08 77292.91 77242.09
metrics.68 4 2 2 77249.97 77300.79 77249.98
metrics.69 4 0 3 77318.05 77383.41 77318.07
metrics.70 4 1 3 77216.05 77274.14 77216.06
metrics.71 4 2 3 77247.27 77305.36 77247.28
metrics.72 4 0 4 77249.95 77322.57 77249.97
metrics.73 4 1 4 77054.02 77119.38 77054.04
metrics.74 4 2 4 77220.91 77286.26 77220.93

From the table, ARIMA(4,1,4) has lowest BIC,AIC and AICc

Code
auto_model2<-auto.arima(kr_ts)
auto_model2
Series: kr_ts 
ARIMA(0,1,0)(0,0,1)[252] with drift 

Coefficients:
        sma1   drift
      0.0223  0.0536
s.e.  0.0098  0.0954

sigma^2 = 91.72:  log likelihood = -38705.69
AIC=77417.39   AICc=77417.39   BIC=77439.17
Code
kr_data<- kr_ts
kr_output1 <- capture.output(sarima(kr_data, 4, 1, 4))

Code
start_line <- grep("Coefficients", kr_output1) 
end_line <- length(kr_output1)  
cat(kr_output1[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE  t.value p.value
ar1        0.2988 0.0170  17.5642   0.000
ar2        0.2035 0.0146  13.9744   0.000
ar3        0.3325 0.0149  22.3085   0.000
ar4       -0.8808 0.0178 -49.5244   0.000
ma1       -0.2817 0.0214 -13.1736   0.000
ma2       -0.1839 0.0178 -10.3540   0.000
ma3       -0.3645 0.0185 -19.7019   0.000
ma4        0.7989 0.0230  34.7057   0.000
constant   0.0540 0.0849   0.6357   0.525

sigma^2 estimated as 88.48015 on 10514 degrees of freedom 
 
AIC = 7.322591  AICc = 7.322593  BIC = 7.329491 
 

From model diagnostic for three potential models, diagnostics are not satisfactory, to evaluate which model to choose, model AIC,BIC and coefficient’s significance. In this sense, i would use ARIMA(4,1,4) model since it has lowest BIC value. Also auto.arima() suggested ARIMA(0,1,0)(0,0,1)[252], however as seen in EDA and univariate section, using ARIMA(4,1,4) is more appropriate.

Residual fit

Code
arima_fit2 <- Arima(kr_ts, order = c(4,1,4), include.drift = TRUE)
res_arima2<- residuals(arima_fit2)

kr_res_acf <- ggAcf(res_arima2,lag.max = 30)+ggtitle("ACF Plot of ARIMA(0,1,1) Residuals") 

kr_res_pacf <- ggPacf(res_arima2,lag.max = 30)+ggtitle("PACF Plot of ARIMA(0,1,1) Residuals") 

kr_res_acf/kr_res_pacf

Code
kr_res_acf <- ggAcf(res_arima2^2,lag.max = 30)+ggtitle("ACF Plot of ARIMA(0,1,1) Residuals") 

kr_res_pacf <- ggPacf(res_arima2^2,lag.max = 30)+ggtitle("PACF Plot of ARIMA(0,1,1) Residuals") 

kr_res_acf/kr_res_pacf

From the ACF and PACF plots of the residuals and squared residuals for the ARIMA(4,1,4) model, the ACF of the squared residuals shows noticeable spikes up to lag 4, followed by a gradual decline. This suggests the presence of an ARCH effect, indicating time-varying volatility in the data. Based on these plots, an appropriate range for manual model search is p,q both in 0:4 range.

Code
library(tseries)
library(knitr)
library(kableExtra)

# Initialize list to store models and track p, q
models <- list()
pq_combinations <- data.frame(p = integer(), q = integer(), AIC = numeric())
cc <- 1

for (p in 0:4) {
  for (q in 0:4) {
    fit <- tryCatch(
      garch(res_arima2, order = c(q, p), trace = FALSE),
      error = function(e) NULL
    )
    if (!is.null(fit)) {
      models[[cc]] <- fit
      pq_combinations <- rbind(
        pq_combinations,
        data.frame(p = p, q = q, AIC = AIC(fit))
      )
      cc <- cc + 1
    }
  }
}

# Round AIC for presentation
pq_combinations$AIC <- round(pq_combinations$AIC, 3)

# Identify row with minimum AIC
highlight_row <- which.min(pq_combinations$AIC)

# Create formatted table
pq_combinations %>%
  kable(
    caption = "AIC Comparison for GARCH(p, q) Models",
    col.names = c("ARCH Order (p)", "GARCH Order (q)", "AIC"),
    align = c("c", "c", "c"),
    booktabs = TRUE
  ) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  row_spec(highlight_row, bold = TRUE, background = "#FFF59D")
AIC Comparison for GARCH(p, q) Models
ARCH Order (p) GARCH Order (q) AIC
0 1 77039.26
0 2 77034.41
0 3 77030.07
0 4 77025.75
1 0 70886.82
1 1 64300.52
1 2 72349.90
1 3 72043.93
1 4 71748.54
2 0 69232.85
2 1 64279.30
2 2 71642.42
2 3 71336.80
2 4 70970.01
3 0 67892.56
3 1 71271.64
3 2 70976.80
3 3 70627.55
3 4 70299.92
4 0 66876.25
4 1 70645.10
4 2 70323.57
4 3 69988.46
4 4 69621.45

From manual search, GARCH(2,1) model is better has lowest AIC

Code
cat("\nSummary for GARCH(2,1):\n")

Summary for GARCH(2,1):
Code
garch_model <- garchFit(~ garch(2, 1), data = res_arima2, trace = FALSE)
summary(garch_model)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(2, 1), data = res_arima2, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(2, 1)
<environment: 0x116756070>
 [data = res_arima2]

Conditional Distribution:
 norm 

Coefficient(s):
         mu        omega       alpha1       alpha2        beta1  
-0.00360202   0.11451836   0.06124867   0.00000001   0.94049761  

Std. Errors:
 based on Hessian 

Error Analysis:
         Estimate  Std. Error  t value Pr(>|t|)    
mu     -3.602e-03   3.977e-02   -0.091    0.928    
omega   1.145e-01   6.631e-03   17.271   <2e-16 ***
alpha1  6.125e-02   7.269e-03    8.426   <2e-16 ***
alpha2  1.000e-08   7.986e-03    0.000    1.000    
beta1   9.405e-01   2.357e-03  398.977   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 -32172.01    normalized:  -3.057014 

Description:
 Thu Nov 20 15:31:15 2025 by user:  


Standardised Residuals Tests:
                                  Statistic      p-Value
 Jarque-Bera Test   R    Chi^2  16959.12128 0.000000e+00
 Shapiro-Wilk Test  R    W               NA           NA
 Ljung-Box Test     R    Q(10)    130.21986 0.000000e+00
 Ljung-Box Test     R    Q(15)    138.42008 0.000000e+00
 Ljung-Box Test     R    Q(20)    151.96975 0.000000e+00
 Ljung-Box Test     R^2  Q(10)     53.38745 6.308545e-08
 Ljung-Box Test     R^2  Q(15)     85.45499 6.931011e-12
 Ljung-Box Test     R^2  Q(20)    101.05036 8.166801e-13
 LM Arch Test       R    TR^2      59.11074 3.276980e-08

Information Criterion Statistics:
     AIC      BIC      SIC     HQIC 
6.114977 6.118427 6.114977 6.116142 
Code
cat("\nSummary for GARCH(1,1):\n")

Summary for GARCH(1,1):
Code
garch_model <- garchFit(~ garch(1,1), data = res_arima2, trace = FALSE)
summary(garch_model)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(1, 1), data = res_arima2, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(1, 1)
<environment: 0x143edfb50>
 [data = res_arima2]

Conditional Distribution:
 norm 

Coefficient(s):
       mu      omega     alpha1      beta1  
-0.003602   0.114441   0.061231   0.940516  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     -0.003602    0.039752   -0.091    0.928    
omega   0.114441    0.006616   17.298   <2e-16 ***
alpha1  0.061231    0.002842   21.543   <2e-16 ***
beta1   0.940516    0.002166  434.227   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 -32171.35    normalized:  -3.056951 

Description:
 Thu Nov 20 15:31:15 2025 by user:  


Standardised Residuals Tests:
                                  Statistic      p-Value
 Jarque-Bera Test   R    Chi^2  16956.04373 0.000000e+00
 Shapiro-Wilk Test  R    W               NA           NA
 Ljung-Box Test     R    Q(10)    130.23088 0.000000e+00
 Ljung-Box Test     R    Q(15)    138.41555 0.000000e+00
 Ljung-Box Test     R    Q(20)    151.97084 0.000000e+00
 Ljung-Box Test     R^2  Q(10)     53.39726 6.282115e-08
 Ljung-Box Test     R^2  Q(15)     85.44565 6.958656e-12
 Ljung-Box Test     R^2  Q(20)    101.05454 8.152368e-13
 LM Arch Test       R    TR^2      59.12034 3.263836e-08

Information Criterion Statistics:
     AIC      BIC      SIC     HQIC 
6.114662 6.117422 6.114662 6.115594 

By manual search to find optimal p,q value, GARCH(2,1) had lowest AIC. However when fitting a model with this parameter, alpha2 are not significant with large p value, trying other close by AIC values could be appropriate. After testing more model, all the coefficient for GARCH(1,1) model are significant and very close AIC value to lowest value, So further modeling GARCH(1,1) will be used.

Code
arima_fit2 <- Arima(kr_ts, order = c(4,1,4), include.drift = TRUE)
arima_res2<-residuals(arima_fit2)
final.fit20<- garchFit(~ garch(1, 1), data = arima_res2, trace = FALSE)
summary(final.fit20)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(1, 1), data = arima_res2, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(1, 1)
<environment: 0x10f56b7b0>
 [data = arima_res2]

Conditional Distribution:
 norm 

Coefficient(s):
       mu      omega     alpha1      beta1  
-0.003602   0.114441   0.061231   0.940516  

Std. Errors:
 based on Hessian 

Error Analysis:
        Estimate  Std. Error  t value Pr(>|t|)    
mu     -0.003602    0.039752   -0.091    0.928    
omega   0.114441    0.006616   17.298   <2e-16 ***
alpha1  0.061231    0.002842   21.543   <2e-16 ***
beta1   0.940516    0.002166  434.227   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 -32171.35    normalized:  -3.056951 

Description:
 Thu Nov 20 15:31:16 2025 by user:  


Standardised Residuals Tests:
                                  Statistic      p-Value
 Jarque-Bera Test   R    Chi^2  16956.04373 0.000000e+00
 Shapiro-Wilk Test  R    W               NA           NA
 Ljung-Box Test     R    Q(10)    130.23088 0.000000e+00
 Ljung-Box Test     R    Q(15)    138.41555 0.000000e+00
 Ljung-Box Test     R    Q(20)    151.97084 0.000000e+00
 Ljung-Box Test     R^2  Q(10)     53.39726 6.282115e-08
 Ljung-Box Test     R^2  Q(15)     85.44565 6.958656e-12
 Ljung-Box Test     R^2  Q(20)    101.05454 8.152368e-13
 LM Arch Test       R    TR^2      59.12034 3.263836e-08

Information Criterion Statistics:
     AIC      BIC      SIC     HQIC 
6.114662 6.117422 6.114662 6.115594 
Code
garch_res30 <- residuals(final.fit20)
checkresiduals(garch_res30)


    Ljung-Box test

data:  Residuals
Q* = 58.374, df = 10, p-value = 7.351e-09

Model df: 0.   Total lags used: 10

For final model, ARIMA(4,1,4)+GARCH(1,1) model was used. Model diagnostic give some intuition for this model. First residual plot shows it is good patterns, similar patterns with spikes. ACF plot also shows most lags fall into threshold, autocorrelation is resolved. however , for Ljung-Box test, it has very small p-value, indicating model fit is not satisfactory. However this model give all the coefficients as significant compare to other model choice, so this is the best model.

Code
fc <-predict(final.fit20, n.ahead = 100)
invisible(predict(final.fit20, n.ahead = 100, plot = TRUE))

Equation:

\[ (1 - \phi_1 B - \phi_2 B^2 - \phi_3 B^3 - \phi_4 B^4)(1 - B)x_t = \delta + (1 + \theta_1 B + \theta_2 B^2 + \theta_3 B^3 + \theta_4 B^4)y_t, \]

\[ \phi_1 = 0.2988,\quad \phi_2 = 0.2035,\quad \phi_3 = 0.3325,\quad \phi_4 = -0.8808, \] \[ \theta_1 = -0.2817,\quad \theta_2 = -0.1839,\quad \theta_3 = -0.3645,\quad \theta_4 = 0.7989,\quad \delta = 0.0540. \]

\[ \begin{aligned} \Delta x_t &= 0.0540 + 0.2988\,\Delta x_{t-1} + 0.2035\,\Delta x_{t-2} + 0.3325\,\Delta x_{t-3} \\ &\quad - 0.8808\,\Delta x_{t-4} + y_t - 0.2817\,y_{t-1} - 0.1839\,y_{t-2} \\ &\quad - 0.3645\,y_{t-3} + 0.7989\,y_{t-4}. \end{aligned} \]

\[ y_t = \sigma_t \varepsilon_t,\qquad \varepsilon_t \sim \text{i.i.d. } N(0,1), \]

\[ \sigma_t^2 = \omega + \alpha_1 y_{t-1}^2 + \beta_1 \sigma_{t-1}^2, \]

\[ \omega = 0.114441,\quad \alpha_1 = 0.061231,\quad \beta_1 = 0.940516, \]

\[ \sigma_t^2 = 0.114441 + 0.061231\,y_{t-1}^2 + 0.940516\,\sigma_{t-1}^2. \]

Modeling Approach 2: GARCH model

In the previous modeling, the overall model fit was not satisfactory. Therefore, in this section, a different modeling approach is used: directly applying models from the ARCH/GARCH family.

Code
kr_acf_log <- ggAcf(kr_return,lag.max = 30)+ggtitle("ACF Plot of KRW/USD exchange spot") + theme_bw() +
  geom_segment(lineend = "butt", color = "orange") +
    geom_hline(yintercept = 0, color = "orange") 
kr_pacf_log <- ggPacf(kr_return,lag.max = 30)+ggtitle("PACF Plot of KRW/USD exchange spot") + theme_bw()+
  geom_segment(lineend = "butt", color = "skyblue") +
    geom_hline(yintercept = 0, color = "skyblue") 

kr_acf_log/kr_pacf_log

Based on the ACF and PACF plots, the parameter search range for the manual selection process will be set from 0 to 5 for both p and q.

Code
# Initialize model storage
models <- list()
results <- data.frame(p = integer(), q = integer(), AIC = numeric())
cc <- 1

# Grid search for GARCH(p, q), where p = ARCH order, q = GARCH order
for (p in 1:5) {
  for (q in 1:5) {
    fit <- tryCatch(
      garch(kr_return, order = c(q, p), trace = FALSE),
      error = function(e) NULL
    )
    if (!is.null(fit)) {
      models[[cc]] <- fit
      results <- rbind(results, data.frame(p = p, q = q, AIC = AIC(fit)))
      cc <- cc + 1
    }
  }
}

# Round AIC values for readability
results$AIC <- round(results$AIC, 3)

# Identify the model with the lowest AIC
highlight_row <- which.min(results$AIC)

# Display formatted table
results %>%
  kable(
    caption = "AIC Comparison for GARCH(p, q) Models",
    col.names = c("ARCH Order (p)", "GARCH Order (q)", "AIC"),
    align = c("c", "c", "c"),
    booktabs = TRUE
  ) %>%
  kable_styling(
    full_width = FALSE,
    position = "center",
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  row_spec(highlight_row, bold = TRUE, background = "#FFF59D")
AIC Comparison for GARCH(p, q) Models
ARCH Order (p) GARCH Order (q) AIC
1 1 -84358.85
1 2 -84401.44
1 3 -84442.49
1 4 -84348.65
1 5 -84292.71
2 1 -83678.21
2 2 -83696.71
2 3 -84394.17
2 4 -84379.66
2 5 -84427.60
3 1 -83394.70
3 2 -83879.07
3 3 -84180.02
3 4 -84387.80
3 5 -84269.48
4 1 -82948.93
4 2 -83631.44
4 3 -84134.03
4 4 -84261.40
4 5 -84342.08
5 1 -82541.62
5 2 -84151.89
5 3 -84297.51
5 4 -84156.74
5 5 -84274.88
Code
garch_model2 <- garchFit(~ garch(1,3), data = kr_return, trace = FALSE)
summary(garch_model2)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(1, 3), data = kr_return, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(1, 3)
<environment: 0x12583e050>
 [data = kr_return]

Conditional Distribution:
 norm 

Coefficient(s):
         mu        omega       alpha1        beta1        beta2        beta3  
-3.4304e-05   3.2428e-07   1.2827e-01   2.6771e-02   3.9511e-01   4.4352e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
         Estimate  Std. Error  t value Pr(>|t|)    
mu     -3.430e-05   3.581e-05   -0.958    0.338    
omega   3.243e-07   4.161e-08    7.793 6.44e-15 ***
alpha1  1.283e-01   6.864e-03   18.687  < 2e-16 ***
beta1   2.677e-02   3.703e-02    0.723    0.470    
beta2   3.951e-01   4.189e-02    9.431  < 2e-16 ***
beta3   4.435e-01   3.712e-02   11.947  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 42238.13    normalized:  4.013887 

Description:
 Thu Nov 20 15:31:17 2025 by user:  


Standardised Residuals Tests:
                                  Statistic      p-Value
 Jarque-Bera Test   R    Chi^2  17181.83321 0.000000e+00
 Shapiro-Wilk Test  R    W               NA           NA
 Ljung-Box Test     R    Q(10)     13.39087 2.026311e-01
 Ljung-Box Test     R    Q(15)     19.00019 2.137254e-01
 Ljung-Box Test     R    Q(20)     24.62524 2.161461e-01
 Ljung-Box Test     R^2  Q(10)     42.25240 6.761575e-06
 Ljung-Box Test     R^2  Q(15)     80.78765 5.011846e-11
 Ljung-Box Test     R^2  Q(20)     96.78413 4.718337e-12
 LM Arch Test       R    TR^2      49.25894 1.884473e-06

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-8.026633 -8.022493 -8.026634 -8.025235 
Code
garch_model3 <- garchFit(~ garch(1,1), data = kr_return, trace = FALSE)
summary(garch_model3)

Title:
 GARCH Modelling 

Call:
 garchFit(formula = ~garch(1, 1), data = kr_return, trace = FALSE) 

Mean and Variance Equation:
 data ~ garch(1, 1)
<environment: 0x141592cc0>
 [data = kr_return]

Conditional Distribution:
 norm 

Coefficient(s):
         mu        omega       alpha1        beta1  
-3.1171e-05   1.3193e-07   5.4492e-02   9.4310e-01  

Std. Errors:
 based on Hessian 

Error Analysis:
         Estimate  Std. Error  t value Pr(>|t|)    
mu     -3.117e-05   3.612e-05   -0.863    0.388    
omega   1.319e-07   1.499e-08    8.803   <2e-16 ***
alpha1  5.449e-02   2.668e-03   20.425   <2e-16 ***
beta1   9.431e-01   2.356e-03  400.327   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Log Likelihood:
 42186.13    normalized:  4.008945 

Description:
 Thu Nov 20 15:31:17 2025 by user:  


Standardised Residuals Tests:
                                  Statistic      p-Value
 Jarque-Bera Test   R    Chi^2  15425.25000 0.000000e+00
 Shapiro-Wilk Test  R    W               NA           NA
 Ljung-Box Test     R    Q(10)     13.91883 1.767264e-01
 Ljung-Box Test     R    Q(15)     19.60159 1.877548e-01
 Ljung-Box Test     R    Q(20)     24.83351 2.078750e-01
 Ljung-Box Test     R^2  Q(10)     59.56276 4.384453e-09
 Ljung-Box Test     R^2  Q(15)     98.10012 2.986500e-14
 Ljung-Box Test     R^2  Q(20)    116.40080 1.332268e-15
 LM Arch Test       R    TR^2      64.93725 2.800070e-09

Information Criterion Statistics:
      AIC       BIC       SIC      HQIC 
-8.017129 -8.014369 -8.017130 -8.016197 

The optimal model based on AIC is the GARCH(1,3) model. When testing the model with the lowest AIC, the overall fit was strong; however, not all coefficients were statistically significant, with some showing large p-values. Therefore, additional model were considered: GARCH(1,1).Comparing the two models, all coefficients in the GARCH(1,1) model were significant, so this model is selected as the final choice.

Code
garch.fit2 <- garch(kr_return, order = c(1,1), trace = FALSE) 
summary(garch.fit2)

Call:
garch(x = kr_return, order = c(1, 1), trace = FALSE)

Model:
GARCH(1,1)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.7036 -0.3260  0.0000  0.2961  9.7624 

Coefficient(s):
    Estimate  Std. Error  t value Pr(>|t|)    
a0 1.317e-07   7.491e-09    17.58   <2e-16 ***
a1 5.447e-02   1.169e-03    46.60   <2e-16 ***
b1 9.431e-01   1.172e-03   804.55   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostic Tests:
    Jarque Bera Test

data:  Residuals
X-squared = 15467, df = 2, p-value < 2.2e-16


    Box-Ljung test

data:  Squared.Residuals
X-squared = 14.196, df = 1, p-value = 0.0001647
Code
garch_res2 <- residuals(garch.fit2)
checkresiduals(garch_res2)


    Ljung-Box test

data:  Residuals
Q* = 563.01, df = 504, p-value = 0.03503

Model df: 0.   Total lags used: 504

From model diagnostics, this model shows similar result for ACF and residual plot compare to other model approach. However, for Ljung-Box test, it has much better p-value, indicating that this model has better model fit compare to other method.

Equation:

\[ r_t = \sigma_t \varepsilon_t, \qquad \varepsilon_t \sim \text{i.i.d. } N(0,1), \]

\[ \sigma_t^2 = \alpha_0 + \alpha_1 r_{t-1}^2 + \beta_1 \sigma_{t-1}^2. \]

\[ \alpha_0 = 1.317\times 10^{-7}, \quad \alpha_1 = 0.05447, \quad \beta_1 = 0.9431, \]

\[ \sigma_t^2 = 1.317\times 10^{-7} + 0.05447\, r_{t-1}^2 + 0.9431\, \sigma_{t-1}^2. \]

Forecasting plot(365days): KRW/USD exchange spot

Code
library(fGarch)
library(lubridate)
library(dplyr)
library(plotly)


gfit <- garchFit(~ garch(1,1),
                 data = as.numeric(kr_return),
                 trace = FALSE)

# Forecast 365 days
h <- 365
pf <- predict(gfit, n.ahead = h)
rhat <- as.numeric(pf$meanForecast)

# Convert differences to prices (simple addition)
last_price <- tail(as.numeric(kr$krw), 1)
price_fc <- numeric(h)
price_fc[1] <- last_price + rhat[1]
for (i in 2:h) {
  price_fc[i] <- price_fc[i-1] + rhat[i]
}

# Create future dates
last_date <- tail(kr$Date, 1) 
future_dates <- seq(last_date + 1, by = "day", length.out = h)

# Historical data
hist_df <- data.frame(
  Date = kr$Date,  
  Price = as.numeric(kr$krw)
)

# Forecast data
fc_df <- data.frame(Date = future_dates, Price = price_fc)

# Show recent 10 years
recent_window <- 3650
hist_df_recent <- tail(hist_df, recent_window)

# Plot
plot_ly() %>%
  add_lines(data = hist_df_recent, x = ~Date, y = ~Price, 
            name = "Historical", line = list(color="blue", width=2)) %>%
  add_lines(data = fc_df, x = ~Date, y = ~Price, 
            name = "GARCH Forecast", 
            line = list(color="red", dash="dash", width=3)) %>%
  layout(title = "KRW/USD Exchange Rate — GARCH(1,1) 365-Day Forecast",
         xaxis = list(title = "Date"),
         yaxis = list(title = "KRW per USD"),
         hovermode = "x unified")

Volatility plot : KRW/USD exchange spot

Code
library(fGarch)
set.seed(1)


garch.fit11 <- garchFit(~ garch(1,1), data = kr_return, trace = FALSE)
ht <- garch.fit11@h.t


data2 <- data.frame(Date = kr$Date[-1][1:length(ht)], Volatility = ht)



p <- ggplot(data2, aes(x = Date, y = Volatility)) +
  geom_line(color = 'orange') +
  labs(
    title = "Conditional Variance from GARCH(1,1) Model",
    x = "Date",
    y = "Conditional Variance"
  ) +
  theme_minimal()
ggplotly(p)