Univariate TS Models(ARIMA/SARIMA)

Author

zoo un park

note

In the EDA part, there is stationary observed from ACF and PACF plot except seoul housing index. Also ADF test(after differencing) confirms this with significant low p-value. To check further specifically seoul housing index, first part of this section is performing second order differencing.

package needed

Code

library(fpp3)  
library(feasts) 
library(tsibble)
library(tidyverse)
library(lubridate)
library(zoo)       
library(forecast)   
library(tseries)   
library(patchwork) 
library(here)
library(plotly)
library(xts)
library(dplyr)
library(ggplot2)
library(ggfortify)
library(knitr)
library(kableExtra)
library(astsa)
library(fpp2)
# calling data

#KR base rate
bok<- read.csv("../data/interest/bok.csv")%>%
  rename(Date = date)
bok$Date<- as.Date(bok$Date)
bok <- bok %>%
  mutate(Date = floor_date(Date, unit = "quarter")) %>%
  group_by(Date) %>%
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")


# FED rate 

usr<- read.csv("../data/interest/us_rate.csv")%>%
  rename(Date = observation_date,rate = DFF)
usr$Date<- as.Date(usr$Date)
usr<- usr %>%
  mutate(Date = floor_date(Date, unit = "quarter")) %>%
  group_by(Date) %>%
  summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")

# KRW/USD rate

kr <- read.csv("../data/fx rate/kor.csv")
kr <- kr %>%
  transmute(
    Date = as.Date(observation_date),
    krw  = DEXKOUS,    
    usd  = 1 / DEXKOUS  
  ) %>%
  filter(!is.na(Date)) %>%
  arrange(Date) %>%
  distinct(Date, .keep_all = TRUE) %>%
  complete(Date = seq(min(Date), max(Date), by = "day")) %>%

  fill(krw, usd, .direction = "down")


# USD index 

usd <- read_csv("../data/fx rate/usd_index.csv", show_col_types = FALSE)
usd <- usd %>%
  transmute(
    Date = as.Date(Date),
    usd_index = Close
  ) %>%
  filter(!is.na(Date)) %>%
  arrange(Date) %>%
  distinct(Date, .keep_all = TRUE) %>%
  complete(Date = seq(min(Date), max(Date), by = "day")) %>%
  fill(usd_index, .direction = "down")



# Seoul housing data

housing <- read_csv(here("data/housing/house.csv")) %>%
  mutate(Date = as.Date(Date))

housing <- housing %>% mutate(Date = as.Date(Date))


# Yield data 

us_yield  <- read.csv(here("data/yield", "us_yield.csv"))
kor_yield   <- read.csv(here("data/yield", "kor_yield.csv"))


us_yield$Date  <- as.Date(us_yield$Date)
kor_yield$Date <- as.Date(kor_yield$Date)

colnames(us_yield)[colnames(us_yield)  == "X3Y"]  <- "US_3Y"
colnames(us_yield)[colnames(us_yield)  == "X10Y"] <- "US_10Y"
colnames(kor_yield)[colnames(kor_yield) == "X3Y"]  <- "KR_3Y"
colnames(kor_yield)[colnames(kor_yield) == "X10Y"] <- "KR_10Y"

us_yield  <- us_yield[,  c("Date", "US_3Y", "US_10Y")]
kor_yield <- kor_yield[, c("Date", "KR_3Y", "KR_10Y")]

us_yield  <- us_yield  %>% arrange(Date) %>% fill(US_3Y, US_10Y, .direction = "down")
kor_yield <- kor_yield %>% arrange(Date) %>% fill(KR_3Y, KR_10Y, .direction = "down")


df <- merge(us_yield, kor_yield, by = "Date")

df$US_3m10  <- df$US_3Y - df$US_10Y
df$KR_3m10  <- df$KR_3Y - df$KR_10Y
df$US_KR_3Y <- df$US_3Y - df$KR_3Y
df$US_KR_10Y<- df$US_10Y - df$KR_10Y


#import & export data 

imports <- read_csv(here::here("data/trade", "imports.csv"))
exports <- read_csv(here::here("data/trade", "exports.csv"))

clean_date <- function(x) {
  as.Date(paste0(sub("^([0-9]{4})([0-9]{2})$", "\\1-\\2", gsub("/", "-", as.character(x))), "-01"))
}

imports <- imports %>%
  mutate(Date = clean_date(Date),
         Type = "Imports")

exports <- exports %>%
  mutate(Date = clean_date(Date),
         Type = "Exports")
trade <- bind_rows(imports, exports)

names(trade)[2] <- "Value"

trade_ts <- trade %>%
  as_tsibble(index = Date, key = Type)


# S&P500 data
sp500 <- read_csv("../data/stock/sp500.csv")


# KOSPI data
kospi <- read_csv("../data/stock/kospi.csv")
kospi$Date <- as.Date(kospi$Date)

Differencing

Code

bok_ts <- ts(bok$base_rate, start = c(year(min(bok$Date)), month(min(bok$Date))), end = c(year(max(bok$Date)), month(max(bok$Date))), frequency=4)
bok_diff<- diff(bok_ts,differences = 2)
ggAcf(bok_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of South Korea base rate") +
  theme_bw()

Code

ggPacf(bok_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of South Korea base rate") +
  theme_bw()

Code

usr_ts <- ts(usr$rate, start = c(year(min(usr$Date)), month(min(usr$Date))), end = c(year(max(usr$Date)), month(max(usr$Date))),frequency= 4)
usr_diff<- diff(usr_ts,differences = 2)

ggAcf(usr_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of Federal Reserve Rate ") +
  theme_bw()

Code

ggPacf(usr_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of Federal Reserve Rate ") +
  theme_bw()

Code

kr_ts<- ts(kr$usd, frequency = 252, start = c(year(min(kr$Date)), month(min(kr$Date))), end = c(year(max(kr$Date)), month(max(kr$Date))))
kr_diff<- diff(kr_ts,differences = 2)

ggAcf(kr_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of KRW/USD FX rate") +
  theme_bw()

Code

ggPacf(kr_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of  KRW/USD FX rate") +
  theme_bw()

Code

import_ts<- ts(imports$USD, frequency = 12, start = c(year(min(imports$Date)), month(min(imports$Date))), end = c(year(max(imports$Date)), month(max(imports$Date))))
import_diff <- diff(import_ts,differences = 2)

ggAcf(import_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of trade import South Korea vs USA") +
  theme_bw()

Code

ggPacf(import_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of trade import South Korea vs USA") +
  theme_bw()

Code

export_ts<- ts(exports$USD, frequency = 12, start = c(year(min(exports$Date)), month(min(exports$Date))), end = c(year(max(exports$Date)), month(max(exports$Date))))
export_diff <- diff(export_ts,differences = 2)

ggAcf(export_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of trade export South Korea vs USA") +
  theme_bw()

Code

ggPacf(export_diff) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of trade export South Korea vs USA") +
  theme_bw()

Code

yield_3y_ts<- ts(df$US_KR_3Y, frequency = 252, start = c(year(min(df$Date)), month(min(df$Date))), end = c(year(max(df$Date)), month(max(df$Date))))
yield_3y_ts_diff <- diff(yield_3y_ts,differences = 2)

ggAcf(yield_3y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of USA & South Korea yield spread rate(3Y)") +
  theme_bw()

Code

ggPacf(yield_3y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of USA & South Korea yield spread rate(3Y)") +
  theme_bw()

Code

yield_10y_ts<- ts(df$US_KR_10Y, frequency = 252, start = c(year(min(df$Date)), month(min(df$Date))), end = c(year(max(df$Date)), month(max(df$Date))))
yield_10y_ts_diff <- diff(yield_10y_ts,differences = 2)

ggAcf(yield_10y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of USA & South Korea yield spread rate(10Y)") +
  theme_bw()

Code

ggPacf(yield_10y_ts_diff,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of USA & South Korea yield spread rate(10Y)") +
  theme_bw()

Code

house_ts<- ts(housing$housing, frequency = 12, start = c(year(min(housing$Date)), month(min(housing$Date))), end = c(year(max(housing$Date)), month(max(housing$Date))))
house_dlog <- diff(house_ts,differences = 2)

ggAcf(house_dlog) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of Seoul housing  property index ") +
  theme_bw()

Code

ggPacf(house_dlog) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of Seoul housing  property index ") +
  theme_bw()

Code

adf.test(house_dlog)


    Augmented Dickey-Fuller Test

data:  house_dlog
Dickey-Fuller = -7.8107, Lag order = 6, p-value = 0.01
alternative hypothesis: stationary

Code

usd_ts <- ts(usd$usd_index, start = c(year(min(usd$Date)), month(min(usd$Date))), end = c(year(max(usd$Date)), month(max(usd$Date))), frequency = 252)
usd_log  <- log(usd_ts)
usd_dlog <- diff(usd_log,diff = 2) 

ggAcf(usd_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of USD index") +
  theme_bw()

Code

ggPacf(usd_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of USD index") +
  theme_bw()

Code

kospi_ts <- ts(kospi$Close, start = c(year(min(kospi$Date)), month(min(kospi$Date))), end = c(year(max(kospi$Date)), month(max(kospi$Date))), frequency = 250)
kospi_log  <- log(kospi_ts)
kospi_dlog <- diff(kospi_log,differences = 2) 

ggAcf(kospi_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of KOSPI index") +
  theme_bw()

Code

ggPacf(kospi_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(2): PACF of KOSPI index") +
  theme_bw()

Code

sp_ts <- ts(sp500$Close, start = c(year(min(sp500$Date)), month(min(sp500$Date))), end = c(year(max(sp500$Date)), month(max(sp500$Date))), frequency = 252)
sp_log  <- log(sp_ts)
sp_dlog <- diff(sp_log,differences = 2) 

ggAcf(sp_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(2): ACF of S&P500 index") +
  theme_bw()

Code

ggPacf(sp_dlog,lag.max = 50) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced (2): PACF of S&P500 index") +
  theme_bw()

In the previous EDA section, we examined ACF & PACF plot before and after differencing. before starting univariate time series analysis, it is important whether differenced result is satisfied for choosing parameter for univariate time series analysis.

Table above is a ACF & PACF plot for differencing, but this time, using second order differencing. In the EDA section, seoul housing index was still suspected non-stationary after first differencing, and in the second differencing,it seems it gives different result. The ACF shows small correlations scattered throughout the lags, with most staying within the confidence bounds. There are some noticeable spikes at lags around 10, 12-13, 16, 18, and 21, but these are relatively modest in magnitude (mostly under 0.15). The overall pattern shows no strong persistence or decay structure. The PACF displays a similar pattern with scattered small spikes, with a notable negative spike at lag 12 (approximately -0.20) and positive spikes at lags 16 and 18. Most other lags remain within the confidence bounds.So second-order differenced series appears to be stationary.

although ADF test result and ACF & PACF plot tells first order differencing(for some variables, log transformation was also done) is enough,except seoul housing index, to make firm decision on parameters, this table has been added. from the plots, it seems that taking second order differencing doesn’t improve significantly for other variables, so for univariate time series, first order differencing will be used for determining model parameters.

Based on the result, next step is to find model selection and its parameter. Table below are the parameters will be used for search algorithm for each data.

Model choices

Model & Parameter selection (USD, KOSPI, and S&P500 are log-transformed before differencing.)
Series	Model parameters
South Korea base rate	ARIMA p = 0,1 d = 1 ,q = 0,1
FED effective rate	ARIMA p = 0,1,2 , d = 0,1 , q = 0,1,2
KRW/USD FX rate	ARIMA p = 0,1 d = 0,1 q = 0,1
Yield spread 3Y (US–KR)	ARIMA p = 0,1 d = 1 q = 0,1
Yield spread 10Y (US–KR)	ARIMA p = 0,1 d = 1 q = 0,1
USD index (log)	ARIMA p = 0,1 d = 1, , q = 0,1
KOSPI index (log)	ARIMA p = 0,1 d = 1,q = 0,1,2
S&P 500 index (log)	ARIMA p = 0,1 d = 0,1, q = 0,1
trade export KOR - USA	SARIMA p = 0,1,d = 0,1 q = 0,1, P = 0,1,2,3 D = 0,1,Q = 0,1 , s= 12
trade import KOR -USA	SARIMA p = 0,1,d = 0,1 q = 0,1, P = 0,1,2 D = 0,1,Q = 0,1, s = 12
Seoul housing property index	SARIMA p =0,1 ,d = 0,1,2 q = 0,1,2 P = 0,1, D = 0,1 ,Q = 0,1,2 ,s = 12

Model selection

ARIMA utility function

Code

library(kableExtra)

arima_grid <- function(ts_obj,
                           p_set, d_set, q_set,
                           caption1 = NULL,
                           caption2 = "Comparison of ARIMA Models") {
  ARIMA_fit <- list()
  
  n <- length(p_set) * length(d_set) * length(q_set)

  results_matrix <- matrix(NA_real_, nrow = n, ncol = 6)
  
  cc <- 1
  
  for (d in d_set) {
    for (p in p_set) {
      for (q in q_set) {
        model <- Arima(ts_obj, order = c(p, d, q), include.drift = (d > 0))
        ARIMA_fit[[cc]] <- model
        results_matrix[cc, ] <- c(p, d, q, model$aic, model$bic, model$aicc)
        cc <- cc + 1
      }
    }
  }
  
  results_df <- as.data.frame(results_matrix)
  colnames(results_df) <- c("p","d","q","AIC","BIC","AICc")
  

  highlight_row <- which.min(results_df$AIC)
  
cap <- if (!is.null(caption1) && nzchar(caption1)) {
    sprintf("%s : %s", caption1, caption2)
  } else {
    caption
  }
  result_table<-knitr::kable(results_df, align = 'c', caption = cap) %>%
    kable_styling(full_width = FALSE, position = "center") %>%
  row_spec(highlight_row, bold = TRUE, background = "#FFFF99")
  
  list(
    table = result_table,
    best_model = ARIMA_fit[[highlight_row]],
    best_row = results_df[highlight_row, , drop = FALSE]
  )
  

}

Code

fit_kor_base <- arima_grid(
  ts_obj = kr_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1),
  caption1 = "South Korea Base Rate "
)
fit_kor_base$table

South Korea Base Rate : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	1	0	-241191.8	-241177.2	-241191.8
0	1	1	-241192.7	-241170.7	-241192.7
1	1	0	-241192.6	-241170.7	-241192.6
1	1	1	-241190.7	-241161.4	-241190.7

Code

print(fit_kor_base$best_model)

Series: ts_obj 
ARIMA(0,1,1) with drift 

Coefficients:
         ma1  drift
      0.0161      0
s.e.  0.0095      0

sigma^2 = 2.116e-11:  log likelihood = 120599.3
AIC=-241192.7   AICc=-241192.7   BIC=-241170.7

Model : ARIMA(0,1,1)

\[ Equation: x_t = 1.0000x_{t-1} + w_t + 0.0161w_{t-1} \]

Code

fit_usr <- arima_grid(
  ts_obj = usr_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1,2),
  caption1 = "FED rate"
)
fit_usr$table

FED rate : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	1	0	143.29499	148.69595	143.40714
0	1	1	97.44701	105.54845	97.67342
0	1	2	86.22130	97.02322	86.60225
1	1	0	71.56170	79.66314	71.78811
1	1	1	73.34440	84.14632	73.72535
1	1	2	74.57780	88.08020	75.15472

Code

print(fit_usr$best_model)

Series: ts_obj 
ARIMA(1,1,0) with drift 

Coefficients:
         ar1   drift
      0.6987  0.0229
s.e.  0.0674  0.1008

sigma^2 = 0.1076:  log likelihood = -32.78
AIC=71.56   AICc=71.79   BIC=79.66

Model: ARIMA(1,1,0)

\[ Equation:\; x_t = x_{t-1} + 0.6987(x_{t-1} - x_{t-2}) + 0.0229 + w_t \]

Code

fit_kr <- arima_grid(
  ts_obj = kr_ts,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "KRW/USD FX rate"
)
fit_kr$table

KRW/USD FX rate : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	0	0	-154525.2	-154510.6	-154525.2
0	0	1	-169644.4	-169622.5	-169644.4
1	0	0	-241202.2	-241180.3	-241202.2
1	0	1	-241203.2	-241173.9	-241203.2
0	1	0	-241191.8	-241177.2	-241191.8
0	1	1	-241192.7	-241170.7	-241192.7
1	1	0	-241192.6	-241170.7	-241192.6
1	1	1	-241190.7	-241161.4	-241190.7

Code

print(fit_kr$best_model)

Series: ts_obj 
ARIMA(1,0,1) with non-zero mean 

Coefficients:
         ar1     ma1    mean
      0.9997  0.0165  0.0011
s.e.  0.0001  0.0095  0.0001

sigma^2 = 2.116e-11:  log likelihood = 120605.6
AIC=-241203.1   AICc=-241203.1   BIC=-241173.9

Model: ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9997x_{t-1} + w_t + 0.0165w_{t-1} + 0.0011 \]

Code

fit_yield_3y <- arima_grid(
  ts_obj = yield_3y_ts,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "USA & South Korea yield spread rate(3Y)"
)
fit_yield_3y$table

USA & South Korea yield spread rate(3Y) : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	0	0	21568.17	21581.66	21568.17
0	0	1	13436.22	13456.46	13436.22
1	0	0	-13412.80	-13392.55	-13412.79
1	0	1	-13481.31	-13454.32	-13481.30
0	1	0	-13411.85	-13398.35	-13411.85
0	1	1	-13481.68	-13461.44	-13481.68
1	1	0	-13481.41	-13461.17	-13481.41
1	1	1	-13479.69	-13452.69	-13479.68

Code

print(fit_yield_3y$best_model)

Series: ts_obj 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1   drift
      -0.1069  -2e-04
s.e.   0.0125   9e-04

sigma^2 = 0.00688:  log likelihood = 6743.84
AIC=-13481.68   AICc=-13481.68   BIC=-13461.44

Model : ARIMA(0,1,1)

\[ Equation:\; x_t = x_{t-1} - 0.0002 + w_t - 0.1069w_{t-1} \]

Code

fit_yield_10y <- arima_grid(
  ts_obj = yield_10y_ts,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "USA & South Korea yield spread rate(10Y)"
)

fit_yield_10y$table

USA & South Korea yield spread rate(10Y) : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	0	0	17508.529	17522.026	17508.531
0	0	1	9586.314	9606.558	9586.318
1	0	0	-13233.117	-13212.872	-13233.113
1	0	1	-13280.071	-13253.079	-13280.065
0	1	0	-13226.383	-13212.887	-13226.382
0	1	1	-13275.509	-13255.265	-13275.505
1	1	0	-13273.890	-13253.646	-13273.886
1	1	1	-13274.976	-13247.985	-13274.970

Code

print(fit_yield_10y$best_model)

Series: ts_obj 
ARIMA(1,0,1) with non-zero mean 

Coefficients:
         ar1      ma1     mean
      0.9968  -0.0896  -0.7027
s.e.  0.0010   0.0128   0.2928

sigma^2 = 0.0071:  log likelihood = 6644.04
AIC=-13280.07   AICc=-13280.06   BIC=-13253.08

Model : ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9968x_{t-1} + w_t - 0.0896w_{t-1} - 0.7027 \]

Code

fit_usd_index <- arima_grid(
  ts_obj = usd_log,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "USD Index (log)"
)
fit_usd_index$table

USD Index (log) : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	0	0	-8748.041	-8733.412	-8748.04
0	0	1	-23691.860	-23669.917	-23691.86
1	0	0	-87836.726	-87814.783	-87836.72
1	0	1	-87835.031	-87805.774	-87835.03
0	1	0	-87836.611	-87821.982	-87836.61
0	1	1	-87834.945	-87813.003	-87834.94
1	1	0	-87834.959	-87813.016	-87834.96
1	1	1	-87832.953	-87803.696	-87832.95

Code

print(fit_usd_index$best_model)

Series: ts_obj 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1    mean
      0.9997  4.5713
s.e.  0.0003  0.1323

sigma^2 = 2.132e-05:  log likelihood = 43921.36
AIC=-87836.73   AICc=-87836.72   BIC=-87814.78

Model: ARIMA(1,0,0)

\[ Equation:\; x_t = 0.9997x_{t-1} + 4.5713 + w_t \]

Code

fit_sp500 <- arima_grid(
  ts_obj = sp_log,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1),
  caption1 = "S&P 500 Index (log)"
)
fit_sp500$table

S&P 500 Index (log) : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	0	0	19265.665	19279.837	19265.667
0	0	1	7232.505	7253.763	7232.508
1	0	0	-53976.415	-53955.158	-53976.413
1	0	1	-54038.354	-54010.010	-54038.349
0	1	0	-53988.748	-53974.576	-53988.747
0	1	1	-54042.503	-54021.245	-54042.500
1	1	0	-54041.343	-54020.086	-54041.341
1	1	1	-54041.223	-54012.879	-54041.218

Code

print(fit_sp500$best_model)

Series: ts_obj 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1  drift
      -0.0802  3e-04
s.e.   0.0107  1e-04

sigma^2 = 0.0001285:  log likelihood = 27024.25
AIC=-54042.5   AICc=-54042.5   BIC=-54021.25

Model: ARIMA(0,1,1)

\[ Equation:\; x_t = x_{t-1} + 0.0003 + w_t - 0.0802w_{t-1} \]

Code

fit_kospi <- arima_grid(
  ts_obj = kospi_log,
  p_set = c(0,1), d_set = c(0,1), q_set = c(0,1,2),
  caption1 = "KOSPI Index (log)"
)

fit_kospi$table

KOSPI Index (log) : Comparison of ARIMA Models
p	d	q	AIC	BIC	AICc
0	0	0	12326.432	12340.209	12326.434
0	0	1	2713.885	2734.551	2713.888
0	0	2	-5173.461	-5145.906	-5173.455
1	0	0	-32793.924	-32773.258	-32793.921
1	0	1	-32796.920	-32769.365	-32796.914
1	0	2	-32798.031	-32763.588	-32798.023
0	1	0	-32794.538	-32780.761	-32794.536
0	1	1	-32797.341	-32776.676	-32797.338
0	1	2	-32798.614	-32771.060	-32798.609
1	1	0	-32797.136	-32776.470	-32797.132
1	1	1	-32797.872	-32770.318	-32797.866
1	1	2	-32796.608	-32762.166	-32796.600

Code

print(fit_kospi$best_model)

Series: ts_obj 
ARIMA(0,1,2) with drift 

Coefficients:
         ma1      ma2  drift
      0.0258  -0.0214  0e+00
s.e.  0.0117   0.0118  3e-04

sigma^2 = 0.0006338:  log likelihood = 16403.31
AIC=-32798.61   AICc=-32798.61   BIC=-32771.06

Model : ARIMA(0,1,2)

\[ Equation:\; x_t = x_{t-1} + 0.0000 + w_t + 0.0258w_{t-1} - 0.0214w_{t-2} \]

SARIMA Model

ACF & PACF plot for seasonal data

Code

import_diff_seasonal <- diff(import_ts,lag = 12)

ggAcf(import_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(seasonal): ACF of trade import South Korea vs USA") +
  theme_bw()

Code

ggPacf(import_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(seasonal): PACF of trade import South Korea vs USA") +
  theme_bw()

Code

export_diff_seasonal <- diff(export_ts,lag = 12)

ggAcf(export_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(seasonal): ACF of trade export South Korea vs USA") +
  theme_bw()

Code

ggPacf(export_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(seasonal): PACF of trade export South Korea vs USA") +
  theme_bw()

Code

house_diff_seasonal <- diff(house_ts,lag = 12)

ggAcf(house_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="blue") +
  labs(title = "Differenced(seasonal): ACF of Seoul housing  property index ") +
  theme_bw()

Code

ggPacf(house_diff_seasonal) +
  geom_segment(aes(xend = lag, yend = 0), color="red") +
  labs(title = "Differenced(seasonal): PACF of Seoul housing  property index ") +
  theme_bw()

SARIMA utility function

Code

library(kableExtra)

sarima_grid <- function(ts_obj,
                            p_set, d_set, q_set,
                            P_set, D_set, Q_set, s,
                            caption_prefix = NULL,
                            caption_suffix = "Comparison of SARIMA Model"){
  SARIMA_fit <- list()
  n <- length(p_set) * length(d_set) * length(q_set) *
       length(P_set) * length(D_set) * length(Q_set)
  results_matrix <- matrix(NA_real_, nrow = n, ncol = 9) 
  cc <- 1


  for (d in d_set) {
    for (p in p_set) {
      for (q in q_set) {
        for (P in P_set) {
          for (D in D_set) {
            for (Q in Q_set) {
              model <- Arima(ts_obj,
                             order   = c(p, d, q),
                             seasonal = list(order = c(P, D, Q), period = s),
                              method = "ML",             
          transform.pars = TRUE )
              SARIMA_fit[[cc]] <- model
              results_matrix[cc, ] <- c(p, d, q, P, D, Q, model$aic, model$bic, model$aicc)
              cc <- cc + 1
            }
          }
        }
      }
    }
  }

  results_df <- as.data.frame(results_matrix)
  colnames(results_df) <- c("p","d","q","P","D","Q","AIC","BIC","AICc")
  highlight_row <- which.min(results_df$AIC)

  cap <- if (!is.null(caption_prefix) && nzchar(caption_prefix)) {
    sprintf("%s : %s", caption_prefix, caption_suffix)
  } else {
    caption_suffix
  }

  result_table1 <-knitr::kable(results_df, align = 'c', caption = cap) %>%
  kableExtra::kable_styling(full_width = FALSE, position = "center") %>%
  kableExtra::row_spec(highlight_row, bold = TRUE, background = "#FFFF99")
  

 list(
    table = result_table1,
    best_model = SARIMA_fit[[highlight_row]],
    best_row = results_df[highlight_row, , drop = FALSE]
  )
  

}

Code

fit_import <- sarima_grid(
  ts_obj = import_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1,2),
  P_set = c(0,1), D_set = c(1), Q_set = c(0,1), s = 12,
  caption_prefix = "Trade import South korea vs USA "
)
fit_import$table

Trade import South korea vs USA : Comparison of SARIMA Model
p	d	q	P	D	Q	AIC	BIC	AICc
0	1	0	0	1	0	11045.46	11049.40	11045.47
0	1	0	0	1	1	10847.43	10855.30	10847.46
0	1	0	1	1	0	10970.25	10978.13	10970.29
0	1	0	1	1	1	10849.13	10860.94	10849.19
0	1	1	0	1	0	10973.83	10981.71	10973.86
0	1	1	0	1	1	10755.51	10767.32	10755.57
0	1	1	1	1	0	10882.42	10894.24	10882.49
0	1	1	1	1	1	10757.16	10772.91	10757.27
0	1	2	0	1	0	10973.53	10985.34	10973.59
0	1	2	0	1	1	10752.03	10767.78	10752.13
0	1	2	1	1	0	10871.81	10887.56	10871.91
0	1	2	1	1	1	10753.11	10772.79	10753.27
1	1	0	0	1	0	10978.12	10985.99	10978.15
1	1	0	0	1	1	10760.13	10771.94	10760.19
1	1	0	1	1	0	10880.65	10892.46	10880.71
1	1	0	1	1	1	10761.37	10777.12	10761.47
1	1	1	0	1	0	10973.73	10985.54	10973.80
1	1	1	0	1	1	10752.62	10768.37	10752.72
1	1	1	1	1	0	10875.74	10891.49	10875.84
1	1	1	1	1	1	10753.78	10773.47	10753.94
1	1	2	0	1	0	10975.53	10991.28	10975.64
1	1	2	0	1	1	10753.95	10773.63	10754.11
1	1	2	1	1	0	10873.77	10893.46	10873.93
1	1	2	1	1	1	10754.99	10778.61	10755.21

Code

print(fit_import$best_model)

Series: ts_obj 
ARIMA(0,1,2)(0,1,1)[12] 

Coefficients:
          ma1     ma2     sma1
      -0.5291  0.1363  -0.9351
s.e.   0.0496  0.0574   0.0363

sigma^2 = 1.132e+11:  log likelihood = -5372.01
AIC=10752.03   AICc=10752.13   BIC=10767.78

Model : SARIMA(0,1,2)(0,1,1)[12]

\[ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5291B + 0.1363B^2)(1 - 0.9351B^{12})w_t \]

Code

fit_export<- sarima_grid(
  ts_obj = export_ts,
  p_set = c(0), d_set = c(0,1), q_set = c(0,1,2,3),
  P_set = c(0), D_set = c(0,1), Q_set = c(0,1), s = 12,
  caption_prefix = " Trade export South korea vs USA  "
)
fit_export$table

Trade export South korea vs USA : Comparison of SARIMA Model
d	q	D	Q	AIC	BIC	AICc
0	0	0	0	12668.68	12676.62	12668.71
0	0	0	1	12304.24	12316.16	12304.31
0	0	1	0	11369.13	11373.07	11369.14
0	0	1	1	11371.12	11379.00	11371.15
0	1	0	0	12260.71	12272.62	12260.77
0	1	0	1	11958.80	11974.69	11958.90
0	1	1	0	11231.46	11239.34	11231.50
0	1	1	1	11226.34	11238.16	11226.40
0	2	0	0	12006.81	12022.70	12006.92
0	2	0	1	11812.77	11832.62	11812.92
0	2	1	0	11182.90	11194.72	11182.97
0	2	1	1	11168.91	11184.67	11169.01
0	3	0	0	11890.94	11910.79	11891.09
0	3	0	1	11712.12	11735.95	11712.34
0	3	1	0	11141.07	11156.83	11141.17
0	3	1	1	11102.09	11121.79	11102.25
1	0	0	0	11451.98	11455.95	11451.99
1	0	0	1	11397.51	11405.44	11397.54
1	0	1	0	11150.77	11154.71	11150.78
1	0	1	1	10997.66	11005.54	10997.70
1	1	0	0	11350.69	11358.63	11350.72
1	1	0	1	11302.46	11314.37	11302.52
1	1	1	0	11062.72	11070.60	11062.75
1	1	1	1	10897.91	10909.72	10897.97
1	2	0	0	11348.22	11360.13	11348.28
1	2	0	1	11302.08	11317.95	11302.18
1	2	1	0	11064.72	11076.53	11064.78
1	2	1	1	10899.82	10915.57	10899.93
1	3	0	0	11340.78	11356.65	11340.88
1	3	0	1	11297.90	11317.74	11298.05
1	3	1	0	11065.89	11081.64	11065.99
1	3	1	1	10901.50	10921.19	10901.66

Code

print(fit_export$best_model)

Series: ts_obj 
ARIMA(0,1,1)(0,1,1)[12] 

Coefficients:
          ma1     sma1
      -0.5536  -0.8441
s.e.   0.0431   0.0329

sigma^2 = 1.711e+11:  log likelihood = -5445.96
AIC=10897.91   AICc=10897.97   BIC=10909.72

Model : SARIMA(0,1,1)(0,1,1)[12]

\[ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5536B)(1 - 0.8441B^{12})w_t \]

Code

fit_housing <- sarima_grid(
  ts_obj = house_ts,
  p_set = c(0,1), d_set = c(1), q_set = c(0,1,2),
  P_set = c(0,1), D_set = c(1), Q_set = c(0,1,2), s = 12,
  caption_prefix = "seoul housing property index"
)
fit_housing$table

seoul housing property index : Comparison of SARIMA Model
p	d	q	P	D	Q	AIC	BIC	AICc
0	1	0	0	1	0	573.71724	577.2307	573.73350
0	1	0	0	1	1	462.76877	469.7956	462.81775
0	1	0	0	1	2	459.44148	469.9818	459.53984
0	1	0	1	1	0	546.33627	553.3631	546.38525
0	1	0	1	1	1	461.20621	471.7465	461.30457
0	1	0	1	1	2	457.73367	471.7874	457.89828
0	1	1	0	1	0	368.36573	375.3926	368.41471
0	1	1	0	1	1	260.21272	270.7530	260.31108
0	1	1	0	1	2	256.58723	270.6409	256.75184
0	1	1	1	1	0	341.54235	352.0826	341.64071
0	1	1	1	1	1	258.35713	272.4108	258.52174
0	1	1	1	1	2	255.67088	273.2380	255.91881
0	1	2	0	1	0	283.99217	294.5325	284.09053
0	1	2	0	1	1	166.55946	180.6132	166.72406
0	1	2	0	1	2	166.16369	183.7308	166.41162
0	1	2	1	1	0	247.83302	261.8867	247.99763
0	1	2	1	1	1	166.73823	184.3054	166.98616
0	1	2	1	1	2	166.66612	187.7467	167.01466
1	1	0	0	1	0	225.98778	233.0146	226.03676
1	1	0	0	1	1	97.72783	108.2681	97.82619
1	1	0	0	1	2	98.90601	112.9597	99.07062
1	1	0	1	1	0	184.13540	194.6757	184.23376
1	1	0	1	1	1	99.11640	113.1701	99.28101
1	1	0	1	1	2	98.96439	116.5315	99.21232
1	1	1	0	1	0	210.49837	221.0387	210.59673
1	1	1	0	1	1	87.26888	101.3226	87.43349
1	1	1	0	1	2	87.86554	105.4327	88.11348
1	1	1	1	1	0	170.61942	184.6731	170.78403
1	1	1	1	1	1	88.20233	105.7695	88.45027
1	1	1	1	1	2	88.56451	109.6451	88.91306
1	1	2	0	1	0	207.31033	221.3640	207.47494
1	1	2	0	1	1	84.22789	101.7950	84.47582
1	1	2	0	1	2	84.72039	105.8010	85.06893
1	1	2	1	1	0	167.49651	185.0637	167.74445
1	1	2	1	1	1	85.07630	106.1569	85.42485
1	1	2	1	1	2	85.51716	110.1112	85.98383

Code

print(fit_housing$best_model)

Series: ts_obj 
ARIMA(1,1,2)(0,1,1)[12] 

Coefficients:
         ar1     ma1     ma2     sma1
      0.7799  0.2868  0.1734  -0.9998
s.e.  0.0508  0.0727  0.0752   0.1302

sigma^2 = 0.06895:  log likelihood = -37.11
AIC=84.23   AICc=84.48   BIC=101.8

Model : SARIMA(1,1,2)(0,1,1)[12]

\[ Equation:\; (1 - 0.7799B)(1 - B)(1 - B^{12})x_t = (1 + 0.2868B + 0.1734B^2)(1 - 0.9998B^{12})w_t \]

Model choice(with equation)

South korea base rate

Model : ARIMA(0,1,1)

\[ Equation: x_t = 1.0000x_{t-1} + w_t + 0.0161w_{t-1} \]

Federal reserve rate

Model: ARIMA(1,1,0)

\[ Equation:\; x_t = x_{t-1} + 0.6987(x_{t-1} - x_{t-2}) + 0.0229 + w_t \]

KRW/USD FX rate

Model: ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9997x_{t-1} + w_t + 0.0165w_{t-1} + 0.0011 \]

USA & South Korea yield spread rate(10Y)

Model : ARIMA(1,0,1)

\[ Equation:\; x_t = 0.9968x_{t-1} + w_t - 0.0896w_{t-1} - 0.7027 \]

USD index

Model: ARIMA(1,0,0)

\[ Equation:\; x_t = 0.9997x_{t-1} + 4.5713 + w_t \]

S&P500 index

Model: ARIMA(0,1,1)

\[ Equation:\; x_t = x_{t-1} + 0.0003 + w_t - 0.0802w_{t-1} \]

KOSPI index

Model : ARIMA(0,1,2)

\[ Equation:\; x_t = x_{t-1} + 0.0000 + w_t + 0.0258w_{t-1} - 0.0214w_{t-2} \]

Trade import South korea vs USA Model

SARIMA(0,1,2)(0,1,1)[12]

\[ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5291B + 0.1363B^2)(1 - 0.9351B^{12})w_t \]

Trade import South korea vs USA

Model : SARIMA(0,1,1)(0,1,1)[12]

\[ Model : SARIMA(0,1,1)(0,1,1)[12] \ Equation:\; (1 - B)(1 - B^{12})x_t = (1 - 0.5536B)(1 - 0.8441B^{12})w_t \]

Seoul housing property index

Model : SARIMA(1,1,2)(0,1,1)[12]

\[ Equation:\; (1 - 0.7799B)(1 - B)(1 - B^{12})x_t = (1 + 0.2868B + 0.1734B^2)(1 - 0.9998B^{12})w_t \]

Model diagnostics

Code

bok_model<- capture.output(sarima(bok_ts, 0,1,1))

The residual diagnostics for the ARIMA(0,1,1) model indicate a fairly good fit. The standardized residuals fluctuate randomly around zero, suggesting the mean of the residuals is stable over time. However there are outlier times for example around 2019 and 2025 Q1. The ACF plot shows no significant spikes beyond the 95% confidence limits, implying that the residuals are uncorrelated and behave like white noise. The Q–Q plot of standardized residuals is mostly linear, though with slight deviations at the tails, indicating mild non-normality. The Ljung–Box test p-values remain above 0.05 across lags, confirming that there is no significant autocorrelation left in the residuals.

Code

usr_model<- capture.output(sarima(usr_ts, 1, 1, 0))

The residuals from the ARIMA(1,1,0) model are well-behaved and largely centered around zero, with only a few spikes visible around 2009 and 2020. The ACF plot reveals no substantial autocorrelation at any lag, supporting the conclusion that the residuals are random and independent. The Q–Q plot demonstrates a near-linear relationship between sample and theoretical quantiles, except for small departures at both ends of the distribution, which suggests slight heavy-tailed behavior. The Ljung–Box p-values mostly exceed 0.05, indicating the residuals are effectively white noise. This model performs well and adequately represents the time series dynamics, with minimal room for improvement.

Code

kr_model<- capture.output(sarima(kr_ts, 1,0,1))

For the ARMA(1,0,1) model, the residuals fluctuate around zero but exhibit slightly higher volatility than desired, particularly during certain time intervals. The ACF of the residuals shows weak but visible autocorrelation at the first few lags. The Q–Q plot displays substantial deviation from the 45-degree line in the tails, implying the presence of heavy tails and possible outliers, showing non-linearity. The Ljung–Box test results show a few p-values below the 0.05 threshold, which further indicates some remaining structure in the residuals. In summary, while the model fits decently, it may not fully capture the short-term autocorrelation and could benefit from a slightly more complex specification.

Code

import_model<- capture.output(sarima(import_ts, 0,1,2,0,1,1,12))

The SARIMA(0,1,2)×(0,1,1)[12] model shows residuals that are roughly centered around zero, though some mild volatility clustering is visible toward the end of the series. The ACF plot exhibits small but notable spikes at seasonal lags. The Q–Q plot suggests that the residuals in heavier tails at the extremes, shows non-linearity. The Ljung–Box test indicates that many p-values are below 0.05, suggesting the presence of lingering autocorrelation, indicating it is not fully capturing underlying structure of time series .

Code

export_model<- capture.output(sarima(export_ts, 0,1,1,0,1,1,12))

The SARIMA(0,1,1)×(0,1,1)[12] model shows residuals that are roughly centered around zero, though some mild volatility clustering is visible toward the end of the series. The ACF plot exhibits small but notable spikes at seasonal lags. The Q–Q plot suggests that the residuals in heavier tails at the extremes, shows non-linearity. The Ljung–Box test indicates that many p-values are below 0.05, suggesting the presence of lingering autocorrelation, indicating it is not fully capturing the underlying structure of time series .

Code

yield_3y_model<- capture.output(sarima(yield_3y_ts, 0, 1, 1))

The ARIMA(0,1,1) model shows residuals that are roughly centered around zero(with one outlier), showing no trend or nonstationarity. The ACF plot is nearly flat, with no significant spikes, suggesting residual independence. The Q–Q plot, however, reveals heavier lower tails, indicating a non-normal distribution. The Ljung–Box test p-values fluctuate around the 0.05 line but mostly remain above it, meaning there is no strong evidence of autocorrelation, it is best to interpret as weak autocorrelation.

Code

yield_10y_model<- capture.output(sarima(yield_10y_ts, 1, 0, 1))

The ARIMA(1,0,1) model shows residuals that are roughly centered around zero(with one outlier), showing no trend or nonstationarity. The ACF plot is nearly flat, with no significant spikes, suggesting residual independence. The Q–Q plot, however, reveals heavier lower tails, indicating a non-normal distribution. The Ljung–Box test p-values fluctuate around the 0.05 line but mostly remain above it, meaning there is no strong evidence of autocorrelation, it is best to interpret as weak autocorrelation.

Code

house_model<- capture.output(sarima(house_ts, 1, 1, 2,0,1,1,12))

The residual diagnostics for the SARIMA(1,1,2)×(0,1,1)[12] model indicate a strong overall fit. The standardized residuals are well-centered and show no visible trend or heteroskedasticity.However since there are fluctuations, it is possible to suspect the fit of the model. The ACF plot reveals that all autocorrelations fall within the confidence bounds, demonstrating that the residuals are uncorrelated. The Q–Q plot is slight deviations at the tails, close to normal, but non-normal. The Ljung–Box test p-values are all above 0.05, suggesting that there is no evidence of autocorrelation remaining.

Code

usd_model<- capture.output(sarima(usd_log, 1,0,0))

The ARIMA(1,0,0) model produces residuals that fluctuate around zero without clear patterns, indicating good fit of the model. The ACF of the residuals is nearly flat, with no significant spikes, indicating that the model successfully removed most autocorrelation. The Q–Q plot shows heavy tails and slight skewness, implying some non-normality. The Ljung–Box p-values are above 0.05 but close to 0.05, meaning there is weak sign of autocorrelation left in the residuals.

Code

kospi_model<- capture.output(sarima(kospi_log, 0,1,2))

The ARIMA(0,1,2) model demonstrates residuals that are mostly stable and centered around zero, with one outlier spike. The ACF plot indicates that residuals are largely uncorrelated, as no autocorrelation exceeds the 95% bounds. The Q–Q plot reveals noticeable deviations at the extremes, with heavy tails indicating non-normal residuals. Ljung–Box test results suggest that most p-values clearly are above 0.05, confirming no remaining autocorrelation.

Code

sp_model<- capture.output(sarima(sp_log, 0,1, 1))

The residual diagnostics for the ARIMA(0,1,1) model indicate that it fits the data reasonably well. The standardized residuals fluctuate randomly around zero with stable variance, though a few volatility spikes appear around 2008–2010 and after 2020. The ACF of residuals shows no significant autocorrelation, suggesting that the residuals behave like white noise. The Normal Q–Q plot is mostly linear but deviates slightly at the tails, indicating heavier tails and mild non-normality. The Ljung–Box p-values remain above 0.05 in the first few lags and are close to 0.05 or less ,autocorrelation exists in the residuals, suggesting the model has not fully captured the underlying structure of the time series.

AUTO.ARIMA

Code

auto.arima(bok_ts)

Series: bok_ts 
ARIMA(1,1,0) 

Coefficients:
         ar1
      0.3735
s.e.  0.0878

sigma^2 = 0.1112:  log likelihood = -34.84
AIC=73.68   AICc=73.79   BIC=79.08

Code

auto.arima(usr_ts)

Series: usr_ts 
ARIMA(1,1,0) 

Coefficients:
         ar1
      0.6988
s.e.  0.0675

sigma^2 = 0.1066:  log likelihood = -32.81
AIC=69.61   AICc=69.73   BIC=75.01

Code

auto.arima(kr_ts)

Series: kr_ts 
ARIMA(0,1,1) 

Coefficients:
         ma1
      0.0162
s.e.  0.0095

sigma^2 = 2.116e-11:  log likelihood = 120598.7
AIC=-241193.4   AICc=-241193.4   BIC=-241178.8

Code

auto.arima(import_ts)

Series: import_ts 
ARIMA(0,1,2)(0,0,1)[12] 

Coefficients:
          ma1     ma2    sma1
      -0.5337  0.1225  0.0936
s.e.   0.0498  0.0577  0.0508

sigma^2 = 1.218e+11:  log likelihood = -5543.73
AIC=11095.45   AICc=11095.56   BIC=11111.33

Code

auto.arima(export_ts)

Series: export_ts 
ARIMA(0,1,3)(0,0,1)[12] with drift 

Coefficients:
          ma1      ma2     ma3    sma1     drift
      -0.5359  -0.1579  0.1212  0.3427  20494.29
s.e.   0.0510   0.0560  0.0518  0.0528  12914.90

sigma^2 = 2.023e+11:  log likelihood = -5642.76
AIC=11297.51   AICc=11297.73   BIC=11321.32

Code

auto.arima(yield_3y_ts)

Series: yield_3y_ts 
ARIMA(0,1,1) 

Coefficients:
          ma1
      -0.1069
s.e.   0.0125

sigma^2 = 0.006879:  log likelihood = 6743.82
AIC=-13483.64   AICc=-13483.64   BIC=-13470.15

Code

auto.arima(yield_10y_ts)

Series: yield_10y_ts 
ARIMA(0,1,0) 

sigma^2 = 0.007165:  log likelihood = 6615.19
AIC=-13228.38   AICc=-13228.38   BIC=-13221.63

Code

auto.arima(house_ts)

Series: house_ts 
ARIMA(1,1,2)(0,0,2)[12] with drift 

Coefficients:
         ar1     ma1     ma2    sma1     sma2   drift
      0.7610  0.2853  0.1916  0.0853  -0.0875  0.1621
s.e.  0.0519  0.0721  0.0731  0.0628   0.0635  0.0987

sigma^2 = 0.06953:  log likelihood = -20.22
AIC=54.44   AICc=54.88   BIC=79.36

Code

auto.arima(usd_log)

Series: usd_log 
ARIMA(0,1,0) 

sigma^2 = 2.133e-05:  log likelihood = 43920.12
AIC=-87838.24   AICc=-87838.24   BIC=-87830.93

Code

auto.arima(kospi_log)

Series: kospi_log 
ARIMA(0,1,2) with drift 

Coefficients:
         ma1      ma2  drift
      0.0258  -0.0214  0e+00
s.e.  0.0117   0.0118  3e-04

sigma^2 = 0.0006338:  log likelihood = 16403.31
AIC=-32798.61   AICc=-32798.61   BIC=-32771.06

Code

auto.arima(sp_log)

Series: sp_log 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1  drift
      -0.0802  3e-04
s.e.   0.0107  1e-04

sigma^2 = 0.0001285:  log likelihood = 27024.25
AIC=-54042.5   AICc=-54042.5   BIC=-54021.25

discrepancy between model selection process and auto.arima

Both model selection procedure and auto.arima() recommend models, but they weigh evidence differently, so agreement occurs only where the diagnostics are unequivocally clean. For the Federal Reserve rate, trade imports, KOSPI, and S&P 500, residual checks are tidy—flat residual ACFs, Ljung–Box p-values consistently above 0.05, and only mild Q–Q tail bends—so both methods converge on the same specifications. In contrast, several other series show diagnostic red flags that nudge the algorithms apart. The South Korea base rate looks broadly well-fit but exhibits outlier patterns that can distort parameter selection; the FX rate has two large residual outliers and Ljung–Box p-values often below 0.05, indicating unresolved autocorrelation; exports likewise show low Ljung–Box p-values and more volatile residuals than peer models; the 10-year yield spread has one large outlier and Ljung–Box p-values that are acceptable at short lags but approach or fall below 0.05 after roughly 10 lags, again signaling remaining dependence; the Seoul housing index displays notably volatile residuals; and the USD index shows low Ljung–Box p-values, confirming residual autocorrelation. In short, where diagnostics are clean, both methods agree; where outliers, volatility, or lingering autocorrelation appear, auto.arima() tends to prefer more parsimonious models, while our approach favors specifications that better purge residual structure—hence the differences.

Forecasting

Code

bok_fit <- Arima(bok_ts, order = c(0,1,1))
bok_forecast_result <- forecast(bok_fit, h = 20)
autoplot(bok_forecast_result) +
  labs(title = "ARIMA(0,1,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

usr_fit <- Arima(usr_ts, order = c(1,1,0))
usr_forecast_result <- forecast(usr_fit, h = 20)
autoplot(usr_forecast_result) +
  labs(title = "ARIMA(1,1,0) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

kr_fit <- Arima(kr_ts, order = c(1,0,1))
kr_forecast_result <- forecast(kr_fit, h = 1008)
autoplot(kr_forecast_result) +
  labs(title = "ARIMA(1,0,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

import_fit <- Arima(import_ts, order = c(0,1,2), seasonal = list(order = c(0,1,1), period = 12))
import_forecast_result <- forecast(import_fit, h = 36,level = c(80, 95))
autoplot(import_forecast_result,PI= TRUE) +
  labs(title = " SARIMA(0,1,2)(0,1,1)[12] Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

export_fit <- Arima(export_ts, order = c(0,1,1), seasonal = list(order = c(0,1,1), period = 12))
export_forecast_result <- forecast(export_fit, h = 36,level = c(80, 95))
autoplot(export_forecast_result,PI= TRUE) +
  labs(title = "Trade export: SARIMA(0,1,1)(0,1,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

yield_3y_fit <- Arima(yield_3y_ts, order = c(0,1,1))
yield_3y_forecast_result <- forecast(yield_3y_fit, h = 1008)
autoplot(yield_3y_forecast_result) +
  labs(title = "ARIMA(0,1,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

yield_10y_fit <- Arima(yield_10y_ts, order = c(1,0,1))
yield_10y_forecast_result <- forecast(yield_10y_fit, h = 1008)
autoplot(yield_10y_forecast_result) +
  labs(title = "ARIMA(1,0,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

house_fit <- Arima(house_ts, order = c(1,1,2), seasonal = list(order = c(0,1,1), period = 12))
house_forecast_result <- forecast(house_fit, h = 36,level = c(80, 95))
autoplot(house_forecast_result,PI= TRUE) +
  labs(title = "SARIMA(1,1,2)(0,1,1)[12] Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

usd_fit <- Arima(usd_log, order = c(1,0,0))
usd_forecast_result_usd <- forecast(usd_fit, h = 1008)

autoplot(usd_forecast_result_usd) +
  labs(title = "ARIMA(1,0,0) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

kospi_fit <- Arima(kospi_log, order = c(0,1,2))
kospi_forecast_result <- forecast(kospi_fit, h = 756)
autoplot(kospi_forecast_result) +
  labs(title = "ARIMA(0,1,2) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code

sp_fit <- Arima(sp_log, order = c(1,0,1))
sp_forecast_result <- forecast(sp_fit, h = 1008)
autoplot(sp_forecast_result) +
  labs(title = "ARIMA(1,0,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

The forecast plots for most series indicate that the models are performing well. The shaded gray bands represent confidence intervals—the ranges within which future values are likely to fall—and their gradual widening over the forecast horizon reflects increasing uncertainty as predictions extend further into the future. For policy rates, which are adjusted periodically rather than daily, short-term fluctuations are naturally limited. This stability is clearly visible in the plots, where the forecast lines remain relatively smooth and the confidence intervals stay narrow over shorter horizons, reflecting the steady nature of monetary policy adjustments and the slower dynamics of interest rate changes compared with high-frequency financial data.

Benchmark

plot utility function

Code

plot_util <- function(forecast_result, ts, h, fit) {
  print(accuracy(forecast_result))  
  autoplot(ts) +
    autolayer(meanf(ts, h = h), series = "Mean", PI = FALSE) +
    autolayer(naive(ts, h = h), series = "Naïve", PI = FALSE) +
    autolayer(snaive(ts, h = h), series = "SNaïve", PI = FALSE) +
    autolayer(rwf(ts, drift = TRUE, h = h), series = "Drift", PI = FALSE) +
    autolayer(forecast(fit, h = h), series = "Fit", PI = FALSE) +
    xlab("Date") + 
    ylab("Predicted Values") +
    guides(colour = guide_legend(title = "Forecast Methods")) +
    theme_minimal()
}

Code

bok_tsibble<-as_tsibble(bok_ts)
fit_bok <- bok_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))  
  )

bok_forecast <- forecast(fit_bok, h = 20)


autoplot(bok_forecast, bok_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code

acc_train <- accuracy(fit_bok) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 20
n <- nrow(bok_tsibble)
bok_train <- bok_tsibble %>% dplyr::slice_head(n = n - h)
bok_test  <- bok_tsibble %>% dplyr::slice_tail(n = h)

fit_train <- bok_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test <- forecast(fit_train, h = h)

acc_test <- accuracy(fc_test, bok_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test)

# A tibble: 5 × 6
  .model       RMSE   MAE  MAPE  MASE  ACF1
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl>
1 Mean         1.05 0.839  29.1   NaN 0.650
2 ARIMA_model  2.51 2.31   66.9   NaN 0.650
3 Naive        2.62 2.42   70.6   NaN 0.650
4 SNaive       2.84 2.66   79.3   NaN 0.622
5 Drift        3.11 2.88   84.1   NaN 0.683

Code

usr_tsibble<-as_tsibble(usr_ts)
fit_usr <- usr_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(1,1,0))  
  )

usr_forecast <- forecast(fit_usr, h = 20)


autoplot(usr_forecast, usr_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code

acc_train_usr <- accuracy(fit_usr) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 20
n <- nrow(usr_tsibble)
usr_train <- usr_tsibble %>% dplyr::slice_head(n = n - h)
usr_test  <- usr_tsibble %>% dplyr::slice_tail(n = h)

fit_train_usr <- usr_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_usr <- forecast(fit_train_usr, h = h)

acc_test_usr <- accuracy(fc_test_usr, usr_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_usr)

# A tibble: 5 × 6
  .model       RMSE   MAE  MAPE  MASE  ACF1
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl>
1 Mean         2.92  2.77 128.    NaN 0.706
2 Naive        4.44  4.18  94.6   NaN 0.706
3 SNaive       4.44  4.18  94.8   NaN 0.706
4 ARIMA_model  4.46  4.20  96.0   NaN 0.706
5 Drift        5.02  4.73 109.    NaN 0.715

Code

plot_util(kr_forecast_result, kr_ts, 756, kr_fit)

                       ME         RMSE          MAE          MPE     MAPE
Training set -4.83016e-08 4.599619e-06 1.730312e-06 -0.007368396 0.193281
                  MASE          ACF1
Training set 0.0256441 -0.0005579747

Code

import_tsibble<-as_tsibble(import_ts)
fit_import <- import_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(
      value ~ pdq(0,1,2) + PDQ(0,1,1, period = 12) + drift())  
  )

import_forecast <- forecast(fit_import, h = 36)


autoplot(import_forecast, import_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code

acc_train_import <- accuracy(fit_import) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 36
n <- nrow(import_tsibble)
import_train <- import_tsibble %>% dplyr::slice_head(n = n - h)
import_test  <- import_tsibble %>% dplyr::slice_tail(n = h)

fit_train_import <- import_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_import <- forecast(fit_train_import, h = h)

acc_test_import <- accuracy(fc_test_import, import_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_import)

# A tibble: 5 × 6
  .model          RMSE      MAE  MAPE  MASE   ACF1
  <chr>          <dbl>    <dbl> <dbl> <dbl>  <dbl>
1 SNaive       974714.  806253.  13.7   NaN 0.0806
2 ARIMA_model 1733375. 1618739.  27.8   NaN 0.368 
3 Naive       1807909. 1722708.  29.5   NaN 0.219 
4 Drift       2142301. 2045964.  34.9   NaN 0.342 
5 Mean        2931573. 2875077.  47.0   NaN 0.219

Code

export_tsibble<-as_tsibble(export_ts)
fit_export <- export_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(
      value ~ pdq(0,1,1) + PDQ(0,1,1, period = 12) + drift())  
  )

export_forecast <- forecast(fit_export, h = 36)


autoplot(export_forecast, export_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code

acc_train_export <- accuracy(fit_export) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 36
n <- nrow(export_tsibble)
export_train <- export_tsibble %>% dplyr::slice_head(n = n - h)
export_test  <- export_tsibble %>% dplyr::slice_tail(n = h)

fit_train_export <- export_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_export <- forecast(fit_train_export, h = h)

acc_test_export <- accuracy(fc_test_export, export_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_export)

# A tibble: 5 × 6
  .model          RMSE      MAE  MAPE  MASE  ACF1
  <chr>          <dbl>    <dbl> <dbl> <dbl> <dbl>
1 ARIMA_model  755978.  598155.  6.08   NaN 0.269
2 Drift       1188767.  982148.  9.36   NaN 0.349
3 SNaive      1506568. 1234981. 11.9    NaN 0.555
4 Naive       1554453. 1314216. 12.5    NaN 0.492
5 Mean        6031747. 5963983. 59.2    NaN 0.492

Code

plot_util(yield_3y_forecast_result,yield_3y_ts,756, yield_3y_fit)

                        ME     RMSE      MAE MPE MAPE       MASE          ACF1
Training set -0.0001976061 0.082926 0.053429 NaN  Inf 0.06586083 -0.0002478776

Code

plot_util(yield_10y_forecast_result,yield_10y_ts,756, yield_10y_fit)

                       ME       RMSE        MAE MPE MAPE       MASE        ACF1
Training set 1.860283e-05 0.08423856 0.05456338 NaN  Inf 0.07908904 0.000947304

Code

house_tsibble<-as_tsibble(house_ts)
fit_house <- house_tsibble%>%
  model(
    Mean  = MEAN(value),
    Naive = NAIVE(value),
    SNaive = SNAIVE(value),
    Drift = RW(value ~ drift()),
    ARIMA_model = ARIMA(
      value ~ pdq(1,1,2) + PDQ(0,1,1, period = 12) + drift())  
  )

house_forecast <- forecast(fit_house, h = 36)


autoplot(house_forecast, house_tsibble) +
  labs(x = "Quarter", y = "Predicted Values", colour = "Forecast Methods") +
    guides(colour = "none", linetype = "none") +  
  theme_minimal()

Code

acc_train_house <- accuracy(fit_house) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)

h <- 36
n <- nrow(house_tsibble)
house_train <- house_tsibble %>% dplyr::slice_head(n = n - h)
house_test  <- house_tsibble %>% dplyr::slice_tail(n = h)

fit_train_house <- house_train %>%
  model(
    Mean        = MEAN(value),
    Naive       = NAIVE(value),
    SNaive      = SNAIVE(value),
    Drift       = RW(value ~ drift()),
    ARIMA_model = ARIMA(value ~ pdq(0,1,1))
  )

fc_test_house <- forecast(fit_train_house, h = h)

acc_test_house <- accuracy(fc_test_house, house_test) %>%
  dplyr::select(.model, RMSE, MAE, MAPE, MASE, ACF1) %>%
  dplyr::arrange(RMSE)
print(acc_test_house)

# A tibble: 5 × 6
  .model       RMSE   MAE  MAPE  MASE  ACF1
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl>
1 SNaive       9.35  8.94  9.02   NaN 0.750
2 Naive        9.58  9.16  9.25   NaN 0.826
3 ARIMA_model 12.9  12.3  12.4    NaN 0.828
4 Drift       14.1  13.4  13.5    NaN 0.849
5 Mean        18.4  18.2  18.1    NaN 0.826

Code

plot_util(usd_forecast_result_usd, usd_log, 756, usd_fit)

                        ME        RMSE         MAE           MPE       MAPE
Training set -2.652266e-05 0.004617329 0.002876984 -0.0006821367 0.06296298
                   MASE         ACF1
Training set 0.04256685 -0.005426123

Code

plot_util(kospi_forecast_result,kospi_log, 756, kospi_fit)

                       ME       RMSE        MAE           MPE     MAPE
Training set 6.399341e-06 0.02516874 0.01110308 -0.0005603165 0.159062
                   MASE          ACF1
Training set 0.04857355 -5.962719e-05

Code

plot_util(sp_forecast_result,sp_log, 756, sp_fit)

                       ME       RMSE         MAE         MPE      MAPE
Training set 0.0003464546 0.01134116 0.007600309 0.004627061 0.1065019
                   MASE          ACF1
Training set 0.05095583 -0.0003861207

The accuracy plots and tables tell a consistent story: at short horizons our ARIMA/SARIMA models generally deliver the lowest MAE and RMSE, sitting below the mean/naïve/seasonal-naïve/drift lines in the horizon–error charts, with the advantage tapering as the horizon lengthens. For policy rates and monthly trade series, SARIMA improves on seasonal-naïve for h≤4–6 steps, while by h≈12 the gap narrows and seasonal-naïve often matches performance. For daily FX and long-maturity yield spreads, random-walk benchmarks (naïve or ARIMA(0,1,1)) remain hard to beat—our curves largely overlap theirs beyond h≈20—so the most reliable gains appear at very short horizons. For equity indices in logs, adding drift yields small but consistent MAE/RMSE improvements up to h≈20–60, after which all models converge. Across series, prediction-interval coverage is close to nominal at short horizons and widens appropriately with h, reinforcing the visual impression from the graphs that uncertainty grows as we look further ahead.

cross validation

utility function & set up

Imports: Seasonal CV (1-step vs 12-step(s)) :ARIMA(0,1,2)(0,1,1)[12]
Horizon	MAE	RMSE	MASE1	MASEs	sMAPE
1-step	248073.4	346362.3	0.8823	0.4859	7.1959
12-step	398896.5	550166.5	1.4187	0.7813	12.5347

Exports — Horizon-wise errors (1..12)
Horizon	MAE_base	MAE_alt	RMSE_base	RMSE_alt
1	349755.2	402025.6	479469.1	531600.0
2	377151.4	450233.5	518060.6	578819.0
3	414306.1	468163.6	550325.8	594720.2
4	453473.0	499933.4	598319.6	638295.1
5	468639.7	511379.7	625379.1	668905.6
6	502155.0	534119.7	648131.5	688184.0
7	536786.1	586430.9	700079.8	749036.9
8	553018.4	606789.1	724952.0	787517.8
9	566518.6	620996.3	728344.1	794421.8
10	593882.3	648108.8	756353.3	825830.1
11	596660.3	642403.4	770452.3	829190.8
12	629585.7	667397.6	803387.3	855657.0

Seoul HPI — Horizon-wise errors (1..12)
Horizon	MAE_base	MAE_alt	RMSE_base	RMSE_alt
1	0.1528	0.1523	0.2306	0.2284
2	0.3521	0.3561	0.5325	0.5366
3	0.5929	0.6014	0.9048	0.9151
4	0.8617	0.8690	1.3228	1.3345
5	1.1358	1.1394	1.7596	1.7747
6	1.4021	1.4107	2.1955	2.2111
7	1.6656	1.6752	2.6253	2.6352
8	1.9212	1.9358	3.0446	3.0414
9	2.1617	2.1837	3.4433	3.4267
10	2.4013	2.4380	3.8220	3.7890
11	2.6343	2.6801	4.1808	4.1258
12	2.8631	2.9175	4.5234	4.4435

For cross-validation, we considered two cases. First, for trade imports, both the model-selection algorithm and auto.arima() chose the same model. Its performance is consistent: as the forecast horizon increases, MAE and RMSE also increase. Second, for Seoul housing prices and trade exports, the two methods selected different models, so we compared their performance directly. Across these comparisons, the model chosen by the model-selection algorithm outperforms the auto.arima() model, yielding lower MAE and RMSE.