Univariate TS Models(ARIMA/SARIMA)

Author

zoo un park

note

In the EDA part, there is stationary observed from ACF and PACF plot except seoul housing index. Also ADF test(after differencing) confirms this with significant low p-value. To check further specifically seoul housing index, first part of this section is performing second order differencing.

package needed

Code
library(fpp3)  
library(feasts) 
library(tsibble)
library(tidyverse)
library(lubridate)
library(zoo)       
library(forecast)   
library(tseries)   
library(patchwork) 
library(here)
library(plotly)
library(xts)
library(dplyr)
library(ggplot2)
library(ggfortify)
library(knitr)
library(kableExtra)
library(astsa)
library(fpp2)
# calling data

#KR base rate
bok<- read.csv("../data/interest/bok.csv")%>%
  rename(Date = date)
bok$Date<- as.Date(bok$Date)
bok <- bok %>%
  mutate(Date = floor_date(Date, unit = "quarter")) %>%
  group_by(Date) %>%
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")


# FED rate 

usr<- read.csv("../data/interest/us_rate.csv")%>%
  rename(Date = observation_date,rate = DFF)
usr$Date<- as.Date(usr$Date)
usr<- usr %>%
  mutate(Date = floor_date(Date, unit = "quarter")) %>%
  group_by(Date) %>%
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")

# KRW/USD rate

kr <- read.csv("../data/fx rate/kor.csv")
kr <- kr %>%
  transmute(
    Date = as.Date(observation_date),
    krw  = DEXKOUS,    
    usd  = 1 / DEXKOUS  
  ) %>%
  filter(!is.na(Date)) %>%
  arrange(Date) %>%
  distinct(Date, .keep_all = TRUE) %>%
  complete(Date = seq(min(Date), max(Date), by = "day")) %>%

  fill(krw, usd, .direction = "down")


# USD index 

usd <- read_csv("../data/fx rate/usd_index.csv", show_col_types = FALSE)
usd <- usd %>%
  transmute(
    Date = as.Date(Date),
    usd_index = Close
  ) %>%
  filter(!is.na(Date)) %>%
  arrange(Date) %>%
  distinct(Date, .keep_all = TRUE) %>%
  complete(Date = seq(min(Date), max(Date), by = "day")) %>%
  fill(usd_index, .direction = "down")



# Seoul housing data

housing <- read_csv(here("data/housing/house.csv")) %>%
  mutate(Date = as.Date(Date))

housing <- housing %>% mutate(Date = as.Date(Date))


# Yield data 

us_yield  <- read.csv(here("data/yield", "us_yield.csv"))
kor_yield   <- read.csv(here("data/yield", "kor_yield.csv"))


us_yield$Date  <- as.Date(us_yield$Date)
kor_yield$Date <- as.Date(kor_yield$Date)

colnames(us_yield)[colnames(us_yield)  == "X3Y"]  <- "US_3Y"
colnames(us_yield)[colnames(us_yield)  == "X10Y"] <- "US_10Y"
colnames(kor_yield)[colnames(kor_yield) == "X3Y"]  <- "KR_3Y"
colnames(kor_yield)[colnames(kor_yield) == "X10Y"] <- "KR_10Y"

us_yield  <- us_yield[,  c("Date", "US_3Y", "US_10Y")]
kor_yield <- kor_yield[, c("Date", "KR_3Y", "KR_10Y")]

us_yield  <- us_yield  %>% arrange(Date) %>% fill(US_3Y, US_10Y, .direction = "down")
kor_yield <- kor_yield %>% arrange(Date) %>% fill(KR_3Y, KR_10Y, .direction = "down")


df <- merge(us_yield, kor_yield, by = "Date")

df$US_3m10  <- df$US_3Y - df$US_10Y
df$KR_3m10  <- df$KR_3Y - df$KR_10Y
df$US_KR_3Y <- df$US_3Y - df$KR_3Y
df$US_KR_10Y<- df$US_10Y - df$KR_10Y


#import & export data 

imports <- read_csv(here::here("data/trade", "imports.csv"))
exports <- read_csv(here::here("data/trade", "exports.csv"))

clean_date <- function(x) {
  as.Date(paste0(sub("^([0-9]{4})([0-9]{2})$", "\\1-\\2", gsub("/", "-", as.character(x))), "-01"))
}

imports <- imports %>%
  mutate(Date = clean_date(Date),
         Type = "Imports")

exports <- exports %>%
  mutate(Date = clean_date(Date),
         Type = "Exports")
trade <- bind_rows(imports, exports)

names(trade)[2] <- "Value"

trade_ts <- trade %>%
  as_tsibble(index = Date, key = Type)


# S&P500 data
sp500 <- read_csv("../data/stock/sp500.csv")


# KOSPI data
kospi <- read_csv("../data/stock/kospi.csv")
kospi$Date <- as.Date(kospi$Date)

Differencing

Code
bok_ts <- ts(bok$base_rate, start = c(year(min(bok$Date)), month(min(bok$Date))), end = c(year(max(bok$Date)), month(max(bok$Date))), frequency=4)
bok_diff<- diff(bok_ts,differences = 2)
ggAcf(bok_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of South Korea base rate") +
  theme_bw()

Code
ggPacf(bok_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of South Korea base rate") +
  theme_bw()

Code
usr_ts <- ts(usr$rate, start = c(year(min(usr$Date)), month(min(usr$Date))), end = c(year(max(usr$Date)), month(max(usr$Date))),frequency= 4)
usr_diff<- diff(usr_ts,differences = 2)

ggAcf(usr_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of Federal Reserve Rate ") +
  theme_bw()

Code
ggPacf(usr_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of Federal Reserve Rate ") +
  theme_bw()

Code
kr_ts<- ts(kr$usd, frequency = 252, start = c(year(min(kr$Date)), month(min(kr$Date))), end = c(year(max(kr$Date)), month(max(kr$Date))))
kr_diff<- diff(kr_ts,differences = 2)

ggAcf(kr_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of KRW/USD FX rate") +
  theme_bw()

Code
ggPacf(kr_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of  KRW/USD FX rate") +
  theme_bw()

Code
import_ts<- ts(imports$USD, frequency = 12, start = c(year(min(imports$Date)), month(min(imports$Date))), end = c(year(max(imports$Date)), month(max(imports$Date))))
import_diff <- diff(import_ts,differences = 2)

ggAcf(import_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of trade import South Korea vs USA") +
  theme_bw()

Code
ggPacf(import_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of trade import South Korea vs USA") +
  theme_bw()

Code
export_ts<- ts(exports$USD, frequency = 12, start = c(year(min(exports$Date)), month(min(exports$Date))), end = c(year(max(exports$Date)), month(max(exports$Date))))
export_diff <- diff(export_ts,differences = 2)

ggAcf(export_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of trade export South Korea vs USA") +
  theme_bw()

Code
ggPacf(export_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of trade export South Korea vs USA") +
  theme_bw()

Code
yield_3y_ts<- ts(df$US_KR_3Y, frequency = 252, start = c(year(min(df$Date)), month(min(df$Date))), end = c(year(max(df$Date)), month(max(df$Date))))
yield_3y_ts_diff <- diff(yield_3y_ts,differences = 2)

ggAcf(yield_3y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of USA & South Korea yield spread rate(3Y)") +
  theme_bw()

Code
ggPacf(yield_3y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of USA & South Korea yield spread rate(3Y)") +
  theme_bw()

Code
yield_10y_ts<- ts(df$US_KR_10Y, frequency = 252, start = c(year(min(df$Date)), month(min(df$Date))), end = c(year(max(df$Date)), month(max(df$Date))))
yield_10y_ts_diff <- diff(yield_10y_ts,differences = 2)

ggAcf(yield_10y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of USA & South Korea yield spread rate(10Y)") +
  theme_bw()

Code
ggPacf(yield_10y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of USA & South Korea yield spread rate(10Y)") +
  theme_bw()

Code
house_ts<- ts(housing$housing, frequency = 12, start = c(year(min(housing$Date)), month(min(housing$Date))), end = c(year(max(housing$Date)), month(max(housing$Date))))
house_dlog <- diff(house_ts,differences = 2)

ggAcf(house_dlog) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of Seoul housing  property index ") +
  theme_bw()

Code
ggPacf(house_dlog) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of Seoul housing  property index ") +
  theme_bw()

Code
adf.test(house_dlog)

    Augmented Dickey-Fuller Test

data:  house_dlog
Dickey-Fuller = -7.8107, Lag order = 6, p-value = 0.01
alternative hypothesis: stationary
Code
usd_ts <- ts(usd$usd_index, start = c(year(min(usd$Date)), month(min(usd$Date))), end = c(year(max(usd$Date)), month(max(usd$Date))), frequency = 252)
usd_log  <- log(usd_ts)
usd_dlog <- diff(usd_log,diff = 2) 

ggAcf(usd_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of USD index") +
  theme_bw()

Code
ggPacf(usd_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of USD index") +
  theme_bw()

Code
kospi_ts <- ts(kospi$Close, start = c(year(min(kospi$Date)), month(min(kospi$Date))), end = c(year(max(kospi$Date)), month(max(kospi$Date))), frequency = 250)
kospi_log  <- log(kospi_ts)
kospi_dlog <- diff(kospi_log,differences = 2) 

ggAcf(kospi_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of KOSPI index") +
  theme_bw()

Code
ggPacf(kospi_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of KOSPI index") +
  theme_bw()

Code
sp_ts <- ts(sp500$Close, start = c(year(min(sp500$Date)), month(min(sp500$Date))), end = c(year(max(sp500$Date)), month(max(sp500$Date))), frequency = 252)
sp_log  <- log(sp_ts)
sp_dlog <- diff(sp_log,differences = 2) 

ggAcf(sp_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of S&P500 index") +
  theme_bw()

Code
ggPacf(sp_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced (2): PACF of S&P500 index") +
  theme_bw()

In the previous EDA section, we examined ACF & PACF plot before and after differencing. before starting univariate time series analysis, it is important whether differenced result is satisfied for choosing parameter for univariate time series analysis.

Table above is a ACF & PACF plot for differencing, but this time, using second order differencing. In the EDA section, seoul housing index was still suspected non-stationary after first differencing, and in the second differencing,it seems it gives different result. The ACF shows small correlations scattered throughout the lags, with most staying within the confidence bounds. There are some noticeable spikes at lags around 10, 12-13, 16, 18, and 21, but these are relatively modest in magnitude (mostly under 0.15). The overall pattern shows no strong persistence or decay structure. The PACF displays a similar pattern with scattered small spikes, with a notable negative spike at lag 12 (approximately -0.20) and positive spikes at lags 16 and 18. Most other lags remain within the confidence bounds.So second-order differenced series appears to be stationary.

although ADF test result and ACF & PACF plot tells first order differencing(for some variables, log transformation was also done) is enough,except seoul housing index, to make firm decision on parameters, this table has been added. from the plots, it seems that taking second order differencing doesn’t improve significantly for other variables, so for univariate time series, first order differencing will be used for determining model parameters.

Based on the result, next step is to find model selection and its parameter. Table below are the parameters will be used for search algorithm for each data.

Model choices

Model & Parameter selection (USD, KOSPI, and S&P500 are log-transformed before differencing.)
Series Model parameters
South Korea base rate ARIMA p = 0,1 d = 1 ,q = 0,1
FED effective rate ARIMA p = 0,1,2 , d = 0,1 , q = 0,1,2
KRW/USD FX rate ARIMA p = 0,1 d = 0,1 q = 0,1
Yield spread 3Y (US–KR) ARIMA p = 0,1 d = 1 q = 0,1
Yield spread 10Y (US–KR) ARIMA p = 0,1 d = 1 q = 0,1
USD index (log) ARIMA p = 0,1 d = 1, , q = 0,1
KOSPI index (log) ARIMA p = 0,1 d = 1,q = 0,1,2
S&P 500 index (log) ARIMA p = 0,1 d = 0,1, q = 0,1
trade export KOR - USA SARIMA p = 0,1,d = 0,1 q = 0,1, P = 0,1,2,3 D = 0,1,Q = 0,1 , s= 12
trade import KOR -USA SARIMA p = 0,1,d = 0,1 q = 0,1, P = 0,1,2 D = 0,1,Q = 0,1, s = 12
Seoul housing property index SARIMA p =0,1 ,d = 0,1,2 q = 0,1,2 P = 0,1, D = 0,1 ,Q = 0,1,2 ,s = 12

Model selection

ARIMA utility function

Code
library(kableExtra)

arima_grid <- function(ts_obj,
                           p_set, d_set, q_set,
                           caption1 = NULL,
                           caption2 = "Comparison of ARIMA Models") {
  ARIMA_fit <- list()
  
  n <- length(p_set) * length(d_set) * length(q_set)

  results_matrix <- matrix(NA_real_, nrow = n, ncol = 6)
  
  cc <- 1
  
  for (d in d_set) {
    for (p in p_set) {
      for (q in q_set) {
        model <- Arima(ts_obj, order = c(p, d, q), include.drift = (d > 0))
        ARIMA_fit[[cc]] <- model
        results_matrix[cc, ] <- c(p, d, q, model$aic, model$bic, model$aicc)
        cc <- cc + 1
      }
    }
  }
  
  results_df <- as.data.frame(results_matrix)
  colnames(results_df) <- c("p","d","q","AIC","BIC","AICc")
  

  highlight_row <- which.min(results_df$AIC)
  
cap <- if (!is.null(caption1) && nzchar(caption1)) {
    sprintf("%s : %s", caption1, caption2)
  } else {
    caption
  }
  result_table<-knitr::kable(results_df, align = 'c', caption = cap) %>%
    kable_styling(full_width = FALSE, position = "center") %>%
  row_spec(highlight_row, bold = TRUE, background = "#FFFF99")
  
  list(
    table = result_table,
    best_model = ARIMA_fit[[highlight_row]],
    best_row = results_df[highlight_row, , drop = FALSE]
  )
  

}
Code
fit_kor_base <- arima_grid(
  ts_obj = kr_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1),
  caption1 = "South Korea Base Rate "
)
fit_kor_base$table
South Korea Base Rate : Comparison of ARIMA Models
p d q AIC BIC AICc
0 1 0 -241191.8 -241177.2 -241191.8
0 1 1 -241192.7 -241170.7 -241192.7
1 1 0 -241192.6 -241170.7 -241192.6
1 1 1 -241190.7 -241161.4 -241190.7
Code
print(fit_kor_base$best_model)
Series: ts_obj 
ARIMA(0,1,1) with drift 

Coefficients:
         ma1  drift
      0.0161      0
s.e.  0.0095      0

sigma^2 = 2.116e-11:  log likelihood = 120599.3
AIC=-241192.7   AICc=-241192.7   BIC=-241170.7

Model : ARIMA(0,1,1)

\[ Equation: x_t = 1.0000x_{t-1} + w_t + 0.0161w_{t-1} \]

Code
fit_usr <- arima_grid(
  ts_obj = usr_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1,2),
  caption1 = "FED rate"
)
fit_usr$table
FED rate : Comparison of ARIMA Models
p d q AIC BIC AICc
0 1 0 143.29499 148.69595 143.40714
0 1 1 97.44701 105.54845 97.67342
0 1 2 86.22130 97.02322 86.60225
1 1 0 71.56170 79.66314 71.78811
1 1 1 73.34440 84.14632 73.72535
1 1 2 74.57780 88.08020 75.15472
Code
print(fit_usr$best_model)
Series: ts_obj 
ARIMA(1,1,0) with drift 

Coefficients:
         ar1   drift
      0.6987  0.0229
s.e.  0.0674  0.1008

sigma^2 = 0.1076:  log likelihood = -32.78
AIC=71.56   AICc=71.79   BIC=79.66

Model: ARIMA(1,1,0)

\[ Equation:\; x_t = x_{t-1} + 0.6987(x_{t-1} - x_{t-2}) + 0.0229 + w_t \]

Code
fit_kr <- arima_grid(
  ts_obj = kr_ts,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "KRW/USD FX rate"
)
fit_kr$table
KRW/USD FX rate : Comparison of ARIMA Models
p d q AIC BIC AICc
0 0 0 -154525.2 -154510.6 -154525.2
0 0 1 -169644.4 -169622.5 -169644.4
1 0 0 -241202.2 -241180.3 -241202.2
1 0 1 -241203.2 -241173.9 -241203.2
0 1 0 -241191.8 -241177.2 -241191.8
0 1 1 -241192.7 -241170.7 -241192.7
1 1 0 -241192.6 -241170.7 -241192.6
1 1 1 -241190.7 -241161.4 -241190.7
Code
print(fit_kr$best_model)
Series: ts_obj 
ARIMA(1,0,1) with non-zero mean 

Coefficients:
         ar1     ma1    mean
      0.9997  0.0165  0.0011
s.e.  0.0001  0.0095  0.0001

sigma^2 = 2.116e-11:  log likelihood = 120605.6
AIC=-241203.1   AICc=-241203.1   BIC=-241173.9

Model: ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9997x_{t-1} + w_t + 0.0165w_{t-1} + 0.0011 \]

Code
fit_yield_3y <- arima_grid(
  ts_obj = yield_3y_ts,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "USA & South Korea yield spread rate(3Y)"
)
fit_yield_3y$table
USA & South Korea yield spread rate(3Y) : Comparison of ARIMA Models
p d q AIC BIC AICc
0 0 0 21568.17 21581.66 21568.17
0 0 1 13436.22 13456.46 13436.22
1 0 0 -13412.80 -13392.55 -13412.79
1 0 1 -13481.31 -13454.32 -13481.30
0 1 0 -13411.85 -13398.35 -13411.85
0 1 1 -13481.68 -13461.44 -13481.68
1 1 0 -13481.41 -13461.17 -13481.41
1 1 1 -13479.69 -13452.69 -13479.68
Code
print(fit_yield_3y$best_model)
Series: ts_obj 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1   drift
      -0.1069  -2e-04
s.e.   0.0125   9e-04

sigma^2 = 0.00688:  log likelihood = 6743.84
AIC=-13481.68   AICc=-13481.68   BIC=-13461.44

Model : ARIMA(0,1,1)

\[ Equation:\; x_t = x_{t-1} - 0.0002 + w_t - 0.1069w_{t-1} \]

Code
fit_yield_10y <- arima_grid(
  ts_obj = yield_10y_ts,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "USA & South Korea yield spread rate(10Y)"
)

fit_yield_10y$table
USA & South Korea yield spread rate(10Y) : Comparison of ARIMA Models
p d q AIC BIC AICc
0 0 0 17508.529 17522.026 17508.531
0 0 1 9586.314 9606.558 9586.318
1 0 0 -13233.117 -13212.872 -13233.113
1 0 1 -13280.071 -13253.079 -13280.065
0 1 0 -13226.383 -13212.887 -13226.382
0 1 1 -13275.509 -13255.265 -13275.505
1 1 0 -13273.890 -13253.646 -13273.886
1 1 1 -13274.976 -13247.985 -13274.970
Code
print(fit_yield_10y$best_model)
Series: ts_obj 
ARIMA(1,0,1) with non-zero mean 

Coefficients:
         ar1      ma1     mean
      0.9968  -0.0896  -0.7027
s.e.  0.0010   0.0128   0.2928

sigma^2 = 0.0071:  log likelihood = 6644.04
AIC=-13280.07   AICc=-13280.06   BIC=-13253.08

Model : ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9968x_{t-1} + w_t - 0.0896w_{t-1} - 0.7027 \]

Code
fit_usd_index <- arima_grid(
  ts_obj = usd_log,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "USD Index (log)"
)
fit_usd_index$table
USD Index (log) : Comparison of ARIMA Models
p d q AIC BIC AICc
0 0 0 -8748.041 -8733.412 -8748.04
0 0 1 -23691.860 -23669.917 -23691.86
1 0 0 -87836.726 -87814.783 -87836.72
1 0 1 -87835.031 -87805.774 -87835.03
0 1 0 -87836.611 -87821.982 -87836.61
0 1 1 -87834.945 -87813.003 -87834.94
1 1 0 -87834.959 -87813.016 -87834.96
1 1 1 -87832.953 -87803.696 -87832.95
Code
print(fit_usd_index$best_model)
Series: ts_obj 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1    mean
      0.9997  4.5713
s.e.  0.0003  0.1323

sigma^2 = 2.132e-05:  log likelihood = 43921.36
AIC=-87836.73   AICc=-87836.72   BIC=-87814.78

Model: ARIMA(1,0,0)

\[ Equation:\; x_t = 0.9997x_{t-1} + 4.5713 + w_t \]

Code
fit_sp500 <- arima_grid(
  ts_obj = sp_log,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "S&P 500 Index (log)"
)
fit_sp500$table
S&P 500 Index (log) : Comparison of ARIMA Models
p d q AIC BIC AICc
0 0 0 19265.665 19279.837 19265.667
0 0 1 7232.505 7253.763 7232.508
1 0 0 -53976.415 -53955.158 -53976.413
1 0 1 -54038.354 -54010.010 -54038.349
0 1 0 -53988.748 -53974.576 -53988.747
0 1 1 -54042.503 -54021.245 -54042.500
1 1 0 -54041.343 -54020.086 -54041.341
1 1 1 -54041.223 -54012.879 -54041.218
Code
print(fit_sp500$best_model)
Series: ts_obj 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1  drift
      -0.0802  3e-04
s.e.   0.0107  1e-04

sigma^2 = 0.0001285:  log likelihood = 27024.25
AIC=-54042.5   AICc=-54042.5   BIC=-54021.25

Model: ARIMA(0,1,1)

\[ Equation:\; x_t = x_{t-1} + 0.0003 + w_t - 0.0802w_{t-1} \]

Code
fit_kospi <- arima_grid(
  ts_obj = kospi_log,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1,2),
  caption1 = "KOSPI Index (log)"
)

fit_kospi$table
KOSPI Index (log) : Comparison of ARIMA Models
p d q AIC BIC AICc
0 0 0 12326.432 12340.209 12326.434
0 0 1 2713.885 2734.551 2713.888
0 0 2 -5173.461 -5145.906 -5173.455
1 0 0 -32793.924 -32773.258 -32793.921
1 0 1 -32796.920 -32769.365 -32796.914
1 0 2 -32798.031 -32763.588 -32798.023
0 1 0 -32794.538 -32780.761 -32794.536
0 1 1 -32797.341 -32776.676 -32797.338
0 1 2 -32798.614 -32771.060 -32798.609
1 1 0 -32797.136 -32776.470 -32797.132
1 1 1 -32797.872 -32770.318 -32797.866
1 1 2 -32796.608 -32762.166 -32796.600
Code
print(fit_kospi$best_model)
Series: ts_obj 
ARIMA(0,1,2) with drift 

Coefficients:
         ma1      ma2  drift
      0.0258  -0.0214  0e+00
s.e.  0.0117   0.0118  3e-04

sigma^2 = 0.0006338:  log likelihood = 16403.31
AIC=-32798.61   AICc=-32798.61   BIC=-32771.06

Model : ARIMA(0,1,2)

\[ Equation:\; x_t = x_{t-1} + 0.0000 + w_t + 0.0258w_{t-1} - 0.0214w_{t-2} \]

SARIMA Model

ACF & PACF plot for seasonal data

Code
import_diff_seasonal <- diff(import_ts,lag = 12)

ggAcf(import_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(seasonal): ACF of trade import South Korea vs USA") +
  theme_bw()

Code
ggPacf(import_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(seasonal): PACF of trade import South Korea vs USA") +
  theme_bw()

Code
export_diff_seasonal <- diff(export_ts,lag = 12)

ggAcf(export_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(seasonal): ACF of trade export South Korea vs USA") +
  theme_bw()

Code
ggPacf(export_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(seasonal): PACF of trade export South Korea vs USA") +
  theme_bw()

Code
house_diff_seasonal <- diff(house_ts,lag = 12)

ggAcf(house_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(seasonal): ACF of Seoul housing  property index ") +
  theme_bw()

Code
ggPacf(house_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(seasonal): PACF of Seoul housing  property index ") +
  theme_bw()

SARIMA utility function

Code
library(kableExtra)

sarima_grid <- function(ts_obj,
                            p_set, d_set, q_set,
                            P_set, D_set, Q_set, s,
                            caption_prefix = NULL,
                            caption_suffix = "Comparison of SARIMA Model"){
  SARIMA_fit <- list()
  n <- length(p_set) * length(d_set) * length(q_set) *
       length(P_set) * length(D_set) * length(Q_set)
  results_matrix <- matrix(NA_real_, nrow = n, ncol = 9) 
  cc <- 1


  for (d in d_set) {
    for (p in p_set) {
      for (q in q_set) {
        for (P in P_set) {
          for (D in D_set) {
            for (Q in Q_set) {
              model <- Arima(ts_obj,
                             order   = c(p, d, q),
                             seasonal = list(order = c(P, D, Q), period = s),
                              method = "ML",             
          transform.pars = TRUE )
              SARIMA_fit[[cc]] <- model
              results_matrix[cc, ] <- c(p, d, q, P, D, Q, model$aic, model$bic, model$aicc)
              cc <- cc + 1
            }
          }
        }
      }
    }
  }

  results_df <- as.data.frame(results_matrix)
  colnames(results_df) <- c("p","d","q","P","D","Q","AIC","BIC","AICc")
  highlight_row <- which.min(results_df$AIC)

  cap <- if (!is.null(caption_prefix) && nzchar(caption_prefix)) {
    sprintf("%s : %s", caption_prefix, caption_suffix)
  } else {
    caption_suffix
  }

  result_table1 <-knitr::kable(results_df, align = 'c', caption = cap) %>%
  kableExtra::kable_styling(full_width = FALSE, position = "center") %>%
  kableExtra::row_spec(highlight_row, bold = TRUE, background = "#FFFF99")
  

 list(
    table = result_table1,
    best_model = SARIMA_fit[[highlight_row]],
    best_row = results_df[highlight_row, , drop = FALSE]
  )
  

}
Code
fit_import <- sarima_grid(
  ts_obj = import_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1,2),
  P_set = c(0,1), D_set = c(1), Q_set = c(0,1), s = 12,
  caption_prefix = "Trade import South korea vs USA "
)
fit_import$table
Trade import South korea vs USA : Comparison of SARIMA Model
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 11045.46 11049.40 11045.47
0 1 0 0 1 1 10847.43 10855.30 10847.46
0 1 0 1 1 0 10970.25 10978.13 10970.29
0 1 0 1 1 1 10849.13 10860.94 10849.19
0 1 1 0 1 0 10973.83 10981.71 10973.86
0 1 1 0 1 1 10755.51 10767.32 10755.57
0 1 1 1 1 0 10882.42 10894.24 10882.49
0 1 1 1 1 1 10757.16 10772.91 10757.27
0 1 2 0 1 0 10973.53 10985.34 10973.59
0 1 2 0 1 1 10752.03 10767.78 10752.13
0 1 2 1 1 0 10871.81 10887.56 10871.91
0 1 2 1 1 1 10753.11 10772.79 10753.27
1 1 0 0 1 0 10978.12 10985.99 10978.15
1 1 0 0 1 1 10760.13 10771.94 10760.19
1 1 0 1 1 0 10880.65 10892.46 10880.71
1 1 0 1 1 1 10761.37 10777.12 10761.47
1 1 1 0 1 0 10973.73 10985.54 10973.80
1 1 1 0 1 1 10752.62 10768.37 10752.72
1 1 1 1 1 0 10875.74 10891.49 10875.84
1 1 1 1 1 1 10753.78 10773.47 10753.94
1 1 2 0 1 0 10975.53 10991.28 10975.64
1 1 2 0 1 1 10753.95 10773.63 10754.11
1 1 2 1 1 0 10873.77 10893.46 10873.93
1 1 2 1 1 1 10754.99 10778.61 10755.21
Code
print(fit_import$best_model)
Series: ts_obj 
ARIMA(0,1,2)(0,1,1)[12] 

Coefficients:
          ma1     ma2     sma1
      -0.5291  0.1363  -0.9351
s.e.   0.0496  0.0574   0.0363

sigma^2 = 1.132e+11:  log likelihood = -5372.01
AIC=10752.03   AICc=10752.13   BIC=10767.78

Model : SARIMA(0,1,2)(0,1,1)[12]

\[ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5291B + 0.1363B^2)(1 - 0.9351B^{12})w_t \]

Code
fit_export<- sarima_grid(
  ts_obj = export_ts,
  p_set = c(0), d_set = c(0,1), q_set = c(0,1,2,3),
  P_set = c(0), D_set = c(0,1), Q_set = c(0,1), s = 12,
  caption_prefix = " Trade export South korea vs USA  "
)
fit_export$table
Trade export South korea vs USA : Comparison of SARIMA Model
p d q P D Q AIC BIC AICc
0 0 0 0 0 0 12668.68 12676.62 12668.71
0 0 0 0 0 1 12304.24 12316.16 12304.31
0 0 0 0 1 0 11369.13 11373.07 11369.14
0 0 0 0 1 1 11371.12 11379.00 11371.15
0 0 1 0 0 0 12260.71 12272.62 12260.77
0 0 1 0 0 1 11958.80 11974.69 11958.90
0 0 1 0 1 0 11231.46 11239.34 11231.50
0 0 1 0 1 1 11226.34 11238.16 11226.40
0 0 2 0 0 0 12006.81 12022.70 12006.92
0 0 2 0 0 1 11812.77 11832.62 11812.92
0 0 2 0 1 0 11182.90 11194.72 11182.97
0 0 2 0 1 1 11168.91 11184.67 11169.01
0 0 3 0 0 0 11890.94 11910.79 11891.09
0 0 3 0 0 1 11712.12 11735.95 11712.34
0 0 3 0 1 0 11141.07 11156.83 11141.17
0 0 3 0 1 1 11102.09 11121.79 11102.25
0 1 0 0 0 0 11451.98 11455.95 11451.99
0 1 0 0 0 1 11397.51 11405.44 11397.54
0 1 0 0 1 0 11150.77 11154.71 11150.78
0 1 0 0 1 1 10997.66 11005.54 10997.70
0 1 1 0 0 0 11350.69 11358.63 11350.72
0 1 1 0 0 1 11302.46 11314.37 11302.52
0 1 1 0 1 0 11062.72 11070.60 11062.75
0 1 1 0 1 1 10897.91 10909.72 10897.97
0 1 2 0 0 0 11348.22 11360.13 11348.28
0 1 2 0 0 1 11302.08 11317.95 11302.18
0 1 2 0 1 0 11064.72 11076.53 11064.78
0 1 2 0 1 1 10899.82 10915.57 10899.93
0 1 3 0 0 0 11340.78 11356.65 11340.88
0 1 3 0 0 1 11297.90 11317.74 11298.05
0 1 3 0 1 0 11065.89 11081.64 11065.99
0 1 3 0 1 1 10901.50 10921.19 10901.66
Code
print(fit_export$best_model)
Series: ts_obj 
ARIMA(0,1,1)(0,1,1)[12] 

Coefficients:
          ma1     sma1
      -0.5536  -0.8441
s.e.   0.0431   0.0329

sigma^2 = 1.711e+11:  log likelihood = -5445.96
AIC=10897.91   AICc=10897.97   BIC=10909.72

Model : SARIMA(0,1,1)(0,1,1)[12]

\[ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5536B)(1 - 0.8441B^{12})w_t \]

Code
fit_housing <- sarima_grid(
  ts_obj = house_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1,2),
  P_set = c(0,1), D_set = c(1), Q_set = c(0,1,2), s = 12,
  caption_prefix = "seoul housing property index"
)
fit_housing$table
seoul housing property index : Comparison of SARIMA Model
p d q P D Q AIC BIC AICc
0 1 0 0 1 0 573.71724 577.2307 573.73350
0 1 0 0 1 1 462.76877 469.7956 462.81775
0 1 0 0 1 2 459.44148 469.9818 459.53984
0 1 0 1 1 0 546.33627 553.3631 546.38525
0 1 0 1 1 1 461.20621 471.7465 461.30457
0 1 0 1 1 2 457.73367 471.7874 457.89828
0 1 1 0 1 0 368.36573 375.3926 368.41471
0 1 1 0 1 1 260.21272 270.7530 260.31108
0 1 1 0 1 2 256.58723 270.6409 256.75184
0 1 1 1 1 0 341.54235 352.0826 341.64071
0 1 1 1 1 1 258.35713 272.4108 258.52174
0 1 1 1 1 2 255.67088 273.2380 255.91881
0 1 2 0 1 0 283.99217 294.5325 284.09053
0 1 2 0 1 1 166.55946 180.6132 166.72406
0 1 2 0 1 2 166.16369 183.7308 166.41162
0 1 2 1 1 0 247.83302 261.8867 247.99763
0 1 2 1 1 1 166.73823 184.3054 166.98616
0 1 2 1 1 2 166.66612 187.7467 167.01466
1 1 0 0 1 0 225.98778 233.0146 226.03676
1 1 0 0 1 1 97.72783 108.2681 97.82619
1 1 0 0 1 2 98.90601 112.9597 99.07062
1 1 0 1 1 0 184.13540 194.6757 184.23376
1 1 0 1 1 1 99.11640 113.1701 99.28101
1 1 0 1 1 2 98.96439 116.5315 99.21232
1 1 1 0 1 0 210.49837 221.0387 210.59673
1 1 1 0 1 1 87.26888 101.3226 87.43349
1 1 1 0 1 2 87.86554 105.4327 88.11348
1 1 1 1 1 0 170.61942 184.6731 170.78403
1 1 1 1 1 1 88.20233 105.7695 88.45027
1 1 1 1 1 2 88.56451 109.6451 88.91306
1 1 2 0 1 0 207.31033 221.3640 207.47494
1 1 2 0 1 1 84.22789 101.7950 84.47582
1 1 2 0 1 2 84.72039 105.8010 85.06893
1 1 2 1 1 0 167.49651 185.0637 167.74445
1 1 2 1 1 1 85.07630 106.1569 85.42485
1 1 2 1 1 2 85.51716 110.1112 85.98383
Code
print(fit_housing$best_model)
Series: ts_obj 
ARIMA(1,1,2)(0,1,1)[12] 

Coefficients:
         ar1     ma1     ma2     sma1
      0.7799  0.2868  0.1734  -0.9998
s.e.  0.0508  0.0727  0.0752   0.1302

sigma^2 = 0.06895:  log likelihood = -37.11
AIC=84.23   AICc=84.48   BIC=101.8

Model : SARIMA(1,1,2)(0,1,1)[12]

\[ Equation:\; (1 - 0.7799B)(1 - B)(1 - B^{12})x_t = (1 + 0.2868B + 0.1734B^2)(1 - 0.9998B^{12})w_t \]

Model choice(with equation)

  • South korea base rate

Model : ARIMA(0,1,1)

\[ Equation: x_t = 1.0000x_{t-1} + w_t + 0.0161w_{t-1} \]

  • Federal reserve rate

Model: ARIMA(1,1,0)

\[ Equation:\; x_t = x_{t-1} + 0.6987(x_{t-1} - x_{t-2}) + 0.0229 + w_t \]

  • KRW/USD FX rate

Model: ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9997x_{t-1} + w_t + 0.0165w_{t-1} + 0.0011 \]

  • USA & South Korea yield spread rate(10Y)

Model : ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9968x_{t-1} + w_t - 0.0896w_{t-1} - 0.7027 \]

  • USD index

Model: ARIMA(1,0,0)

\[ Equation:\; x_t = 0.9997x_{t-1} + 4.5713 + w_t \]

  • S&P500 index

Model: ARIMA(0,1,1)

\[ Equation:\; x_t = x_{t-1} + 0.0003 + w_t - 0.0802w_{t-1} \]

  • KOSPI index

Model : ARIMA(0,1,2)

\[ Equation:\; x_t = x_{t-1} + 0.0000 + w_t + 0.0258w_{t-1} - 0.0214w_{t-2} \]

  • Trade import South korea vs USA Model

SARIMA(0,1,2)(0,1,1)[12]

\[ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5291B + 0.1363B^2)(1 - 0.9351B^{12})w_t \]

  • Trade import South korea vs USA

Model : SARIMA(0,1,1)(0,1,1)[12]

\[ Model : SARIMA(0,1,1)(0,1,1)[12] \ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5536B)(1 - 0.8441B^{12})w_t \]

  • Seoul housing property index

Model : SARIMA(1,1,2)(0,1,1)[12]

\[ Equation:\; (1 - 0.7799B)(1 - B)(1 - B^{12})x_t = (1 + 0.2868B + 0.1734B^2)(1 - 0.9998B^{12})w_t \]

Model diagnostics

Code
bok_model<- capture.output(sarima(bok_ts, 0,1,1))

The residual diagnostics for the ARIMA(0,1,1) model indicate a fairly good fit. The standardized residuals fluctuate randomly around zero, suggesting the mean of the residuals is stable over time. However there are outlier times for example around 2019 and 2025 Q1. The ACF plot shows no significant spikes beyond the 95% confidence limits, implying that the residuals are uncorrelated and behave like white noise. The Q–Q plot of standardized residuals is mostly linear, though with slight deviations at the tails, indicating mild non-normality. The Ljung–Box test p-values remain above 0.05 across lags, confirming that there is no significant autocorrelation left in the residuals.

Code
usr_model<- capture.output(sarima(usr_ts, 1, 1, 0))

The residuals from the ARIMA(1,1,0) model are well-behaved and largely centered around zero, with only a few spikes visible around 2009 and 2020. The ACF plot reveals no substantial autocorrelation at any lag, supporting the conclusion that the residuals are random and independent. The Q–Q plot demonstrates a near-linear relationship between sample and theoretical quantiles, except for small departures at both ends of the distribution, which suggests slight heavy-tailed behavior. The Ljung–Box p-values mostly exceed 0.05, indicating the residuals are effectively white noise. This model performs well and adequately represents the time series dynamics, with minimal room for improvement.

Code
kr_model<- capture.output(sarima(kr_ts, 1,0,1))

For the ARMA(1,0,1) model, the residuals fluctuate around zero but exhibit slightly higher volatility than desired, particularly during certain time intervals. The ACF of the residuals shows weak but visible autocorrelation at the first few lags. The Q–Q plot displays substantial deviation from the 45-degree line in the tails, implying the presence of heavy tails and possible outliers, showing non-linearity. The Ljung–Box test results show a few p-values below the 0.05 threshold, which further indicates some remaining structure in the residuals. In summary, while the model fits decently, it may not fully capture the short-term autocorrelation and could benefit from a slightly more complex specification.

Code
import_model<- capture.output(sarima(import_ts, 0,1,2,0,1,1,12))

The SARIMA(0,1,2)×(0,1,1)[12] model shows residuals that are roughly centered around zero, though some mild volatility clustering is visible toward the end of the series. The ACF plot exhibits small but notable spikes at seasonal lags. The Q–Q plot suggests that the residuals in heavier tails at the extremes, shows non-linearity. The Ljung–Box test indicates that many p-values are below 0.05, suggesting the presence of lingering autocorrelation, indicating it is not fully capturing underlying structure of time series .

Code
export_model<- capture.output(sarima(export_ts, 0,1,1,0,1,1,12))

The SARIMA(0,1,1)×(0,1,1)[12] model shows residuals that are roughly centered around zero, though some mild volatility clustering is visible toward the end of the series. The ACF plot exhibits small but notable spikes at seasonal lags. The Q–Q plot suggests that the residuals in heavier tails at the extremes, shows non-linearity. The Ljung–Box test indicates that many p-values are below 0.05, suggesting the presence of lingering autocorrelation, indicating it is not fully capturing the underlying structure of time series .

Code
yield_3y_model<- capture.output(sarima(yield_3y_ts, 0, 1, 1))

The ARIMA(0,1,1) model shows residuals that are roughly centered around zero(with one outlier), showing no trend or nonstationarity. The ACF plot is nearly flat, with no significant spikes, suggesting residual independence. The Q–Q plot, however, reveals heavier lower tails, indicating a non-normal distribution. The Ljung–Box test p-values fluctuate around the 0.05 line but mostly remain above it, meaning there is no strong evidence of autocorrelation, it is best to interpret as weak autocorrelation.

Code
yield_10y_model<- capture.output(sarima(yield_10y_ts, 1, 0, 1))

The ARIMA(1,0,1) model shows residuals that are roughly centered around zero(with one outlier), showing no trend or nonstationarity. The ACF plot is nearly flat, with no significant spikes, suggesting residual independence. The Q–Q plot, however, reveals heavier lower tails, indicating a non-normal distribution. The Ljung–Box test p-values fluctuate around the 0.05 line but mostly remain above it, meaning there is no strong evidence of autocorrelation, it is best to interpret as weak autocorrelation.

Code
house_model<- capture.output(sarima(house_ts, 1, 1, 2,0,1,1,12))

The residual diagnostics for the SARIMA(1,1,2)×(0,1,1)[12] model indicate a strong overall fit. The standardized residuals are well-centered and show no visible trend or heteroskedasticity.However since there are fluctuations, it is possible to suspect the fit of the model. The ACF plot reveals that all autocorrelations fall within the confidence bounds, demonstrating that the residuals are uncorrelated. The Q–Q plot is slight deviations at the tails, close to normal, but non-normal. The Ljung–Box test p-values are all above 0.05, suggesting that there is no evidence of autocorrelation remaining.

Code
usd_model<- capture.output(sarima(usd_log, 1,0,0))

The ARIMA(1,0,0) model produces residuals that fluctuate around zero without clear patterns, indicating good fit of the model. The ACF of the residuals is nearly flat, with no significant spikes, indicating that the model successfully removed most autocorrelation. The Q–Q plot shows heavy tails and slight skewness, implying some non-normality. The Ljung–Box p-values are above 0.05 but close to 0.05, meaning there is weak sign of autocorrelation left in the residuals.

Code
kospi_model<- capture.output(sarima(kospi_log, 0,1,2))

The ARIMA(0,1,2) model demonstrates residuals that are mostly stable and centered around zero, with one outlier spike. The ACF plot indicates that residuals are largely uncorrelated, as no autocorrelation exceeds the 95% bounds. The Q–Q plot reveals noticeable deviations at the extremes, with heavy tails indicating non-normal residuals. Ljung–Box test results suggest that most p-values clearly are above 0.05, confirming no remaining autocorrelation.

Code
sp_model<- capture.output(sarima(sp_log, 0,1, 1))

The residual diagnostics for the ARIMA(0,1,1) model indicate that it fits the data reasonably well. The standardized residuals fluctuate randomly around zero with stable variance, though a few volatility spikes appear around 2008–2010 and after 2020. The ACF of residuals shows no significant autocorrelation, suggesting that the residuals behave like white noise. The Normal Q–Q plot is mostly linear but deviates slightly at the tails, indicating heavier tails and mild non-normality. The Ljung–Box p-values remain above 0.05 in the first few lags and are close to 0.05 or less ,autocorrelation exists in the residuals, suggesting the model has not fully captured the underlying structure of the time series.

AUTO.ARIMA

Code
auto.arima(bok_ts)
Series: bok_ts 
ARIMA(1,1,0) 

Coefficients:
         ar1
      0.3735
s.e.  0.0878

sigma^2 = 0.1112:  log likelihood = -34.84
AIC=73.68   AICc=73.79   BIC=79.08
Code
auto.arima(usr_ts)
Series: usr_ts 
ARIMA(1,1,0) 

Coefficients:
         ar1
      0.6988
s.e.  0.0675

sigma^2 = 0.1066:  log likelihood = -32.81
AIC=69.61   AICc=69.73   BIC=75.01
Code
auto.arima(kr_ts)
Series: kr_ts 
ARIMA(0,1,1) 

Coefficients:
         ma1
      0.0162
s.e.  0.0095

sigma^2 = 2.116e-11:  log likelihood = 120598.7
AIC=-241193.4   AICc=-241193.4   BIC=-241178.8
Code
auto.arima(import_ts)
Series: import_ts 
ARIMA(0,1,2)(0,0,1)[12] 

Coefficients:
          ma1     ma2    sma1
      -0.5337  0.1225  0.0936
s.e.   0.0498  0.0577  0.0508

sigma^2 = 1.218e+11:  log likelihood = -5543.73
AIC=11095.45   AICc=11095.56   BIC=11111.33
Code
auto.arima(export_ts)
Series: export_ts 
ARIMA(0,1,3)(0,0,1)[12] with drift 

Coefficients:
          ma1      ma2     ma3    sma1     drift
      -0.5359  -0.1579  0.1212  0.3427  20494.29
s.e.   0.0510   0.0560  0.0518  0.0528  12914.90

sigma^2 = 2.023e+11:  log likelihood = -5642.76
AIC=11297.51   AICc=11297.73   BIC=11321.32
Code
auto.arima(yield_3y_ts)
Series: yield_3y_ts 
ARIMA(0,1,1) 

Coefficients:
          ma1
      -0.1069
s.e.   0.0125

sigma^2 = 0.006879:  log likelihood = 6743.82
AIC=-13483.64   AICc=-13483.64   BIC=-13470.15
Code
auto.arima(yield_10y_ts)
Series: yield_10y_ts 
ARIMA(0,1,0) 

sigma^2 = 0.007165:  log likelihood = 6615.19
AIC=-13228.38   AICc=-13228.38   BIC=-13221.63
Code
auto.arima(house_ts)
Series: house_ts 
ARIMA(1,1,2)(0,0,2)[12] with drift 

Coefficients:
         ar1     ma1     ma2    sma1     sma2   drift
      0.7610  0.2853  0.1916  0.0853  -0.0875  0.1621
s.e.  0.0519  0.0721  0.0731  0.0628   0.0635  0.0987

sigma^2 = 0.06953:  log likelihood = -20.22
AIC=54.44   AICc=54.88   BIC=79.36
Code
auto.arima(usd_log)
Series: usd_log 
ARIMA(0,1,0) 

sigma^2 = 2.133e-05:  log likelihood = 43920.12
AIC=-87838.24   AICc=-87838.24   BIC=-87830.93
Code
auto.arima(kospi_log)
Series: kospi_log 
ARIMA(0,1,2) with drift 

Coefficients:
         ma1      ma2  drift
      0.0258  -0.0214  0e+00
s.e.  0.0117   0.0118  3e-04

sigma^2 = 0.0006338:  log likelihood = 16403.31
AIC=-32798.61   AICc=-32798.61   BIC=-32771.06
Code
auto.arima(sp_log)
Series: sp_log 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1  drift
      -0.0802  3e-04
s.e.   0.0107  1e-04

sigma^2 = 0.0001285:  log likelihood = 27024.25
AIC=-54042.5   AICc=-54042.5   BIC=-54021.25

discrepancy between model selection process and auto.arima

Both model selection procedure and auto.arima() recommend models, but they weigh evidence differently, so agreement occurs only where the diagnostics are unequivocally clean. For the Federal Reserve rate, trade imports, KOSPI, and S&P 500, residual checks are tidy—flat residual ACFs, Ljung–Box p-values consistently above 0.05, and only mild Q–Q tail bends—so both methods converge on the same specifications. In contrast, several other series show diagnostic red flags that nudge the algorithms apart. The South Korea base rate looks broadly well-fit but exhibits outlier patterns that can distort parameter selection; the FX rate has two large residual outliers and Ljung–Box p-values often below 0.05, indicating unresolved autocorrelation; exports likewise show low Ljung–Box p-values and more volatile residuals than peer models; the 10-year yield spread has one large outlier and Ljung–Box p-values that are acceptable at short lags but approach or fall below 0.05 after roughly 10 lags, again signaling remaining dependence; the Seoul housing index displays notably volatile residuals; and the USD index shows low Ljung–Box p-values, confirming residual autocorrelation. In short, where diagnostics are clean, both methods agree; where outliers, volatility, or lingering autocorrelation appear, auto.arima() tends to prefer more parsimonious models, while our approach favors specifications that better purge residual structure—hence the differences.

Forecasting

Code
bok_fit <- Arima(bok_ts, order = c(0,1,1))
bok_forecast_result <- forecast(bok_fit, h = 20)
autoplot(bok_forecast_result) +
  labs(title = "ARIMA(0,1,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
usr_fit <- Arima(usr_ts, order = c(1,1,0))
usr_forecast_result <- forecast(usr_fit, h = 20)
autoplot(usr_forecast_result) +
  labs(title = "ARIMA(1,1,0) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
kr_fit <- Arima(kr_ts, order = c(1,0,1))
kr_forecast_result <- forecast(kr_fit, h = 1008)
autoplot(kr_forecast_result) +
  labs(title = "ARIMA(1,0,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
import_fit <- Arima(import_ts, order = c(0,1,2), seasonal = list(order = c(0,1,1), period = 12))
import_forecast_result <- forecast(import_fit, h = 36,level = c(80, 95))
autoplot(import_forecast_result,PI= TRUE) +
  labs(title = " SARIMA(0,1,2)(0,1,1)[12] Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
export_fit <- Arima(export_ts, order = c(0,1,1), seasonal = list(order = c(0,1,1), period = 12))
export_forecast_result <- forecast(export_fit, h = 36,level = c(80, 95))
autoplot(export_forecast_result,PI= TRUE) +
  labs(title = "Trade export: SARIMA(0,1,1)(0,1,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
yield_3y_fit <- Arima(yield_3y_ts, order = c(0,1,1))
yield_3y_forecast_result <- forecast(yield_3y_fit, h = 1008)
autoplot(yield_3y_forecast_result) +
  labs(title = "ARIMA(0,1,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
yield_10y_fit <- Arima(yield_10y_ts, order = c(1,0,1))
yield_10y_forecast_result <- forecast(yield_10y_fit, h = 1008)
autoplot(yield_10y_forecast_result) +
  labs(title = "ARIMA(1,0,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
house_fit <- Arima(house_ts, order = c(1,1,2), seasonal = list(order = c(0,1,1), period = 12))
house_forecast_result <- forecast(house_fit, h = 36,level = c(80, 95))
autoplot(house_forecast_result,PI= TRUE) +
  labs(title = "SARIMA(1,1,2)(0,1,1)[12] Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
usd_fit <- Arima(usd_log, order = c(1,0,0))
usd_forecast_result_usd <- forecast(usd_fit, h = 1008)

autoplot(usd_forecast_result_usd) +
  labs(title = "ARIMA(1,0,0) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal() 

Code
kospi_fit <- Arima(kospi_log, order = c(0,1,2))
kospi_forecast_result <- forecast(kospi_fit, h = 756)
autoplot(kospi_forecast_result) +
  labs(title = "ARIMA(0,1,2) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
sp_fit <- Arima(sp_log, order = c(1,0,1))
sp_forecast_result <- forecast(sp_fit, h = 1008)
autoplot(sp_forecast_result) +
  labs(title = "ARIMA(1,0,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

The forecast plots for most series indicate that the models are performing well. The shaded gray bands represent confidence intervals—the ranges within which future values are likely to fall—and their gradual widening over the forecast horizon reflects increasing uncertainty as predictions extend further into the future. For policy rates, which are adjusted periodically rather than daily, short-term fluctuations are naturally limited. This stability is clearly visible in the plots, where the forecast lines remain relatively smooth and the confidence intervals stay narrow over shorter horizons, reflecting the steady nature of monetary policy adjustments and the slower dynamics of interest rate changes compared with high-frequency financial data.

Benchmark

plot utility function

Code
plot_util <- function(forecast_result, ts, h, fit) {
  print(accuracy(forecast_result))  
  autoplot(ts) +
    autolayer(meanf(ts, h = h), series = "Mean", PI = FALSE) +
    autolayer(naive(ts, h = h), series = "Naïve", PI = FALSE) +
    autolayer(snaive(ts, h = h), series = "SNaïve", PI = FALSE) +
    autolayer(rwf(ts, drift = TRUE, h = h), series = "Drift", PI = FALSE) +
    autolayer(forecast(fit, h = h), series = "Fit", PI = FALSE) +
    xlab("Date") + 
    ylab("Predicted Values") +
    guides(colour = guide_legend(title = "Forecast Methods")) +
    theme_minimal()
}
Code
bok_tsibble<-as_tsibble(bok_ts)
fit_bok <- bok_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))  
  )

bok_forecast <- forecast(fit_bok, h = 20)


autoplot(bok_forecast, bok_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code
acc_train <- accuracy(fit_bok) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 20
n <- nrow(bok_tsibble)
bok_train <- bok_tsibble %>% dplyr::slice_head(n = n - h)
bok_test  <- bok_tsibble %>% dplyr::slice_tail(n = h)

fit_train <- bok_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test <- forecast(fit_train, h = h)

acc_test <- accuracy(fc_test, bok_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test)
# A tibble: 5 × 6
  .model       RMSE   MAE  MAPE  MASE  ACF1
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl>
1 Mean         1.05 0.839  29.1   NaN 0.650
2 ARIMA_model  2.51 2.31   66.9   NaN 0.650
3 Naive        2.62 2.42   70.6   NaN 0.650
4 SNaive       2.84 2.66   79.3   NaN 0.622
5 Drift        3.11 2.88   84.1   NaN 0.683
Code
usr_tsibble<-as_tsibble(usr_ts)
fit_usr <- usr_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(1,1,0))  
  )

usr_forecast <- forecast(fit_usr, h = 20)


autoplot(usr_forecast, usr_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code
acc_train_usr <- accuracy(fit_usr) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 20
n <- nrow(usr_tsibble)
usr_train <- usr_tsibble %>% dplyr::slice_head(n = n - h)
usr_test  <- usr_tsibble %>% dplyr::slice_tail(n = h)

fit_train_usr <- usr_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_usr <- forecast(fit_train_usr, h = h)

acc_test_usr <- accuracy(fc_test_usr, usr_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_usr)
# A tibble: 5 × 6
  .model       RMSE   MAE  MAPE  MASE  ACF1
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl>
1 Mean         2.92  2.77 128.    NaN 0.706
2 Naive        4.44  4.18  94.6   NaN 0.706
3 SNaive       4.44  4.18  94.8   NaN 0.706
4 ARIMA_model  4.46  4.20  96.0   NaN 0.706
5 Drift        5.02  4.73 109.    NaN 0.715
Code
plot_util(kr_forecast_result, kr_ts, 756, kr_fit)
                       ME         RMSE          MAE          MPE     MAPE
Training set -4.83016e-08 4.599619e-06 1.730312e-06 -0.007368396 0.193281
                  MASE          ACF1
Training set 0.0256441 -0.0005579747

Code
import_tsibble<-as_tsibble(import_ts)
fit_import <- import_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(
      value ~ pdq(0,1,2) + PDQ(0,1,1, period = 12) + drift())  
  )

import_forecast <- forecast(fit_import, h = 36)


autoplot(import_forecast, import_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code
acc_train_import <- accuracy(fit_import) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 36
n <- nrow(import_tsibble)
import_train <- import_tsibble %>% dplyr::slice_head(n = n - h)
import_test  <- import_tsibble %>% dplyr::slice_tail(n = h)

fit_train_import <- import_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_import <- forecast(fit_train_import, h = h)

acc_test_import <- accuracy(fc_test_import, import_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_import)
# A tibble: 5 × 6
  .model          RMSE      MAE  MAPE  MASE   ACF1
  <chr>          <dbl>    <dbl> <dbl> <dbl>  <dbl>
1 SNaive       974714.  806253.  13.7   NaN 0.0806
2 ARIMA_model 1733375. 1618739.  27.8   NaN 0.368 
3 Naive       1807909. 1722708.  29.5   NaN 0.219 
4 Drift       2142301. 2045964.  34.9   NaN 0.342 
5 Mean        2931573. 2875077.  47.0   NaN 0.219 
Code
export_tsibble<-as_tsibble(export_ts)
fit_export <- export_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(
      value ~ pdq(0,1,1) + PDQ(0,1,1, period = 12) + drift())  
  )

export_forecast <- forecast(fit_export, h = 36)


autoplot(export_forecast, export_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code
acc_train_export <- accuracy(fit_export) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 36
n <- nrow(export_tsibble)
export_train <- export_tsibble %>% dplyr::slice_head(n = n - h)
export_test  <- export_tsibble %>% dplyr::slice_tail(n = h)

fit_train_export <- export_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_export <- forecast(fit_train_export, h = h)

acc_test_export <- accuracy(fc_test_export, export_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_export)
# A tibble: 5 × 6
  .model          RMSE      MAE  MAPE  MASE  ACF1
  <chr>          <dbl>    <dbl> <dbl> <dbl> <dbl>
1 ARIMA_model  755978.  598155.  6.08   NaN 0.269
2 Drift       1188767.  982148.  9.36   NaN 0.349
3 SNaive      1506568. 1234981. 11.9    NaN 0.555
4 Naive       1554453. 1314216. 12.5    NaN 0.492
5 Mean        6031747. 5963983. 59.2    NaN 0.492
Code
plot_util(yield_3y_forecast_result,yield_3y_ts,756, yield_3y_fit)
                        ME     RMSE      MAE MPE MAPE       MASE          ACF1
Training set -0.0001976061 0.082926 0.053429 NaN  Inf 0.06586083 -0.0002478776

Code
plot_util(yield_10y_forecast_result,yield_10y_ts,756, yield_10y_fit)
                       ME       RMSE        MAE MPE MAPE       MASE        ACF1
Training set 1.860283e-05 0.08423856 0.05456338 NaN  Inf 0.07908904 0.000947304

Code
house_tsibble<-as_tsibble(house_ts)
fit_house <- house_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(
      value ~ pdq(1,1,2) + PDQ(0,1,1, period = 12) + drift())  
  )

house_forecast <- forecast(fit_house, h = 36)


autoplot(house_forecast, house_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code
acc_train_house <- accuracy(fit_house) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 36
n <- nrow(house_tsibble)
house_train <- house_tsibble %>% dplyr::slice_head(n = n - h)
house_test  <- house_tsibble %>% dplyr::slice_tail(n = h)

fit_train_house <- house_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_house <- forecast(fit_train_house, h = h)

acc_test_house <- accuracy(fc_test_house, house_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_house)
# A tibble: 5 × 6
  .model       RMSE   MAE  MAPE  MASE  ACF1
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl>
1 SNaive       9.35  8.94  9.02   NaN 0.750
2 Naive        9.58  9.16  9.25   NaN 0.826
3 ARIMA_model 12.9  12.3  12.4    NaN 0.828
4 Drift       14.1  13.4  13.5    NaN 0.849
5 Mean        18.4  18.2  18.1    NaN 0.826
Code
plot_util(usd_forecast_result_usd, usd_log, 756, usd_fit)
                        ME        RMSE         MAE           MPE       MAPE
Training set -2.652266e-05 0.004617329 0.002876984 -0.0006821367 0.06296298
                   MASE         ACF1
Training set 0.04256685 -0.005426123

Code
plot_util(kospi_forecast_result,kospi_log, 756, kospi_fit)
                       ME       RMSE        MAE           MPE     MAPE
Training set 6.399341e-06 0.02516874 0.01110308 -0.0005603165 0.159062
                   MASE          ACF1
Training set 0.04857355 -5.962719e-05

Code
plot_util(sp_forecast_result,sp_log, 756, sp_fit)
                       ME       RMSE         MAE         MPE      MAPE
Training set 0.0003464546 0.01134116 0.007600309 0.004627061 0.1065019
                   MASE          ACF1
Training set 0.05095583 -0.0003861207

The accuracy plots and tables tell a consistent story: at short horizons our ARIMA/SARIMA models generally deliver the lowest MAE and RMSE, sitting below the mean/naïve/seasonal-naïve/drift lines in the horizon–error charts, with the advantage tapering as the horizon lengthens. For policy rates and monthly trade series, SARIMA improves on seasonal-naïve for h≤4–6 steps, while by h≈12 the gap narrows and seasonal-naïve often matches performance. For daily FX and long-maturity yield spreads, random-walk benchmarks (naïve or ARIMA(0,1,1)) remain hard to beat—our curves largely overlap theirs beyond h≈20—so the most reliable gains appear at very short horizons. For equity indices in logs, adding drift yields small but consistent MAE/RMSE improvements up to h≈20–60, after which all models converge. Across series, prediction-interval coverage is close to nominal at short horizons and widens appropriately with h, reinforcing the visual impression from the graphs that uncertainty grows as we look further ahead.

cross validation

utility function & set up

Imports: Seasonal CV (1-step vs 12-step(s)) :ARIMA(0,1,2)(0,1,1)[12]
Horizon MAE RMSE MASE1 MASEs sMAPE
1-step 248073.4 346362.3 0.8823 0.4859 7.1959
12-step 398896.5 550166.5 1.4187 0.7813 12.5347

Exports — Horizon-wise errors (1..12)
Horizon MAE_base MAE_alt RMSE_base RMSE_alt
1 349755.2 402025.6 479469.1 531600.0
2 377151.4 450233.5 518060.6 578819.0
3 414306.1 468163.6 550325.8 594720.2
4 453473.0 499933.4 598319.6 638295.1
5 468639.7 511379.7 625379.1 668905.6
6 502155.0 534119.7 648131.5 688184.0
7 536786.1 586430.9 700079.8 749036.9
8 553018.4 606789.1 724952.0 787517.8
9 566518.6 620996.3 728344.1 794421.8
10 593882.3 648108.8 756353.3 825830.1
11 596660.3 642403.4 770452.3 829190.8
12 629585.7 667397.6 803387.3 855657.0

Seoul HPI — Horizon-wise errors (1..12)
Horizon MAE_base MAE_alt RMSE_base RMSE_alt
1 0.1528 0.1523 0.2306 0.2284
2 0.3521 0.3561 0.5325 0.5366
3 0.5929 0.6014 0.9048 0.9151
4 0.8617 0.8690 1.3228 1.3345
5 1.1358 1.1394 1.7596 1.7747
6 1.4021 1.4107 2.1955 2.2111
7 1.6656 1.6752 2.6253 2.6352
8 1.9212 1.9358 3.0446 3.0414
9 2.1617 2.1837 3.4433 3.4267
10 2.4013 2.4380 3.8220 3.7890
11 2.6343 2.6801 4.1808 4.1258
12 2.8631 2.9175 4.5234 4.4435

For cross-validation, we considered two cases. First, for trade imports, both the model-selection algorithm and auto.arima() chose the same model. Its performance is consistent: as the forecast horizon increases, MAE and RMSE also increase. Second, for Seoul housing prices and trade exports, the two methods selected different models, so we compared their performance directly. Across these comparisons, the model chosen by the model-selection algorithm outperforms the auto.arima() model, yielding lower MAE and RMSE.