Rで混合マルコフ劣化ハザードモデル

水原希子が可愛すぎて辛い

あっ, どーも僕です.

社会基盤施設の安全で最適な維持管理には劣化予測によるリスク評価が必要となります. ここでは, 数ある劣化予測のための統計モデルのうち, スタンダードな「混合マルコフ劣化ハザードモデルのベイズモデル」のRのスクリプトを載せたいと思います.

論文はこちらから

「混合マルコフ劣化ハザードモデルのベイズモデル」は, 混合, マルコフ劣化ハザードモデル, ベイズモデルにわけられ, 混合, ベイズモデルは一般的な意味となります. マルコフ劣化ハザードモデルとはなにかといえば, 一言でいうとマルコフ性を仮定したハザードモデルという意味です.

ここで述べなければならないのは, なぜマルコフ劣化ハザードモデルという一般には聞き慣れない統計モデルが, スタンダードとなっているかです. それは社会基盤施設の劣化が健全度という多水準の順序尺度時系列データで評価されることが理由です. 劣化予測とはつまり健全度ごとの生存期間の評価を意味しますが, 一般に健全度がいつ劣化したのかは観測できません. また, 健全度は時間に依存して一方向(劣化)に遷移していきます. このような他分野であまりみられないデータ構造を表現するため, マルコフ劣化ハザードモデルが開発されました. このあたりの議論は下記論文を参照してください.

論文はこちらから

以下は, Rのスクリプトと, その利用例のスクリプトです. 工夫した点はExcelサンプラーパラメータを管理するために, rstan::samplingにpurrr::liftを利用しいることです. 疑問・不備はそれとなく言及してください.

# MarkovHazardModel_MixedExp_v3.stan
data {
    int<lower=1> size_data;
    int<lower=1> s_worst;
    int<lower=1> s_pre[size_data];
    int<lower=1> s_pos[size_data];
    int<lower=1> Z[size_data];
    int<lower=1> index_hetero[size_data];
    int<lower=1> size_hetero;
    int<lower=1> size_variable;
    matrix[size_data, size_variable] variables;
}

transformed data {
    real BETA_BOUND;
    real EPS;
    real PHI_MAX;
    real HETERO_MAX;
    real PENALTY;
    real HPAR_MAX;
    BETA_BOUND <- 30.;
    EPS        <- 0.;
    HETERO_MAX <- 10.;
    PHI_MAX    <- 100.;
    HPAR_MAX   <- 100.;
    PENALTY    <- -100.;
}

parameters {
    matrix<lower=-BETA_BOUND,upper=BETA_BOUND>[size_variable, s_worst - 1] Beta;
    vector<lower=EPS,upper=HETERO_MAX>[size_hetero] Hetero;
    real<lower=EPS,upper=PHI_MAX> phi;
    real<lower=EPS,upper=HPAR_MAX> shape;
    real<lower=EPS,upper=HPAR_MAX> rate;
}

transformed parameters {

}

model {
    vector [size_data] LogLikelihood;
    real prod_1;
    real prod_2;
    real transitionProb;
    real transitionProb_sum;
    int  i;
    int  j;
    matrix[size_data, s_worst - 1] Theta;
    Theta <- exp (variables * Beta);

    for (n in 1:size_data)
    {
        i <- s_pre[n];
        j <- s_pos[n];
        if (i == s_worst) {
            LogLikelihood[n] <- 0.0;
        } else if (j == s_worst) {
            transitionProb_sum <- 0.0;
            for (h in i:(j-1))
            {
                transitionProb <- 0.0;
                for (k in i:h)
                {
                    prod_1 <- 1.0;
                    prod_2 <- 1.0;
                    if (k != i) {
                        for (m in i:(k-1))
                        {
                            prod_1 <- prod_1 * Theta[n, m] / (Theta[n, m] - Theta[n, k]);
                        }
                    }
                    if (k != h) {
                        for (m in k:(h-1))
                        {
                            prod_2 <- prod_2 * Theta[n, m] / (Theta[n, m+1] - Theta[n, k]);
                        }
                    }
                    transitionProb <- transitionProb + prod_1 * prod_2 * exp (- Hetero[index_hetero[n]] * Theta[n, k] * Z[n]);
                }
                transitionProb_sum <- transitionProb_sum + transitionProb;
            }
            LogLikelihood[n] <- if_else (transitionProb_sum > 1. || transitionProb_sum <= 0.0, PENALTY, log1m(transitionProb_sum));
        } else {
            transitionProb <- 0.0;
            for (k in i:j)
            {
                prod_1 <- 1.0;
                prod_2 <- 1.0;
                if (k != i) {
                    for (m in i:(k-1))
                    {
                        prod_1 <- prod_1 * Theta[n, m] / (Theta[n, m] - Theta[n, k]);
                    }
                }
                if (k != j) {
                    for (m in k:(j-1))
                    {
                        prod_2 <- prod_2 * Theta[n, m] / (Theta[n, m+1] - Theta[n, k]);
                    }
                }
                transitionProb <- transitionProb + prod_1 * prod_2 * exp (- Hetero[index_hetero[n]] * Theta[n, k] * Z[n]);
            }
            LogLikelihood[n] <- if_else (transitionProb > 1. || transitionProb <= 0.0, PENALTY, log(transitionProb));
        }
    }

    // prior paramter
    Hetero ~ gamma (phi, phi);

    // hyper prior paramter
    phi ~ gamma (shape, rate);

    // increment log probability
    increment_log_prob (sum (LogLikelihood));
}

generated quantities {

}

# MarkovHazardModel_BayesMixedExp_v3.4.beta.r
MarkovHazardModel_BayesMixedExp <- setRefClass (

    Class   = "MarkovHazardModel_BayesMixedExp",

    fields  = c (Data_              = "list",
                 CompiledCode_      = "list",
                 SamplerParameters_ = "list",
                 Stanfit_           = "list",
                 Hats_              = "list",
                 EPS_               = "numeric"),

    methods = list (
        initialize = function (loadLibrary = FALSE)
        {
            initFields (Data_              = list (),
                        CompiledCode_      = list (),
                        SamplerParameters_ = list (),
                        Stanfit_           = list (),
                        EPS_               = 1e-9)
            if(loadLibrary) loadLibs()
        },
        loadLibs = function ()
        {
            target_libs  <- c ("ggplot2", "Rcpp", "parallel", "rstan", "readr", "readxl", "matrixStats", "purrr")
            install_libs <- setdiff (target_libs, .packages(all.available = TRUE))
            for (lib in install_libs)
                install.packages (lib, repos = "http://cran.ism.ac.jp", depend = TRUE)
            for (lib in target_libs)
                suppressPackageStartupMessages (library (lib, character.only = TRUE, warn.conflicts = FALSE, quietly = TRUE))
            rstan_options(auto_write = TRUE)
            options(mc.cores = parallel::detectCores())
        },
        readSamplerParameters = function (path = "./par/par.xlsx", sheetName = "samplerParameters")
        {
            cnvtr <- list ("dbl" = as.numeric, "chr" = as.character, "int" = as.integer)
            par   <- readxl::read_excel(path, sheet = sheetName, col_names = TRUE)
            args  <- par[["arg"]]
            vals  <- par[["val"]]
            types <- par[["type"]]
            SamplerParameters_ <<- list()
            Map(function(a, v, t) SamplerParameters_[[a]] <<- cnvtr[[t]](v), args, vals, types)
        },
        readData = function (path = "./par/par.xlsx", sheetName = "data", s_worst = 5)
        {
            data                      <-  readxl::read_excel (path, sheet = sheetName, col_names = TRUE)
            Data_[["size_data"]]      <<- nrow (data)
            Data_[["s_worst"]]        <<- s_worst
            Data_[["s_pre"]]          <<- data$I
            Data_[["s_pos"]]          <<- data$J
            Data_[["Z"]]              <<- data$Z
            Data_[["index_hetero"]]   <<- data$index_hetero
            Data_[["size_hetero"]]    <<- length (unique (Data_[["index_hetero"]]))
            Data_[["size_variable"]]  <<- length (grep ("^v_", colnames (data)))
            Data_[["variables"]]      <<- data [grep ("^v_", colnames (data))]
        },
        compileStanCode = function (pathStanCode =  "./src/MarkovHazardModel_MixedExp_v3.stan")
        {
            CompiledCode_[["code"]]     <<- stan_model (file = pathStanCode)
            CompiledCode_[["compiled"]] <<- !is.null (CompiledCode_[["code"]])
            saveCompiledStanCode (paste0(pathStanCode, ".compiled.RDS"))
        },
        saveCompiledStanCode = function (path)
        {
            saveRDS (bemh$CompiledCode_[["code"]], path)
        },
        readCompiledStanCode = function (path)
        {
            CompiledCode_[["code"]]     <<- NULL
            CompiledCode_[["code"]]     <<- readRDS (path)
            CompiledCode_[["compiled"]] <<- TRUE
        },
        fitStan = function ()
        {
            if (CompiledCode_[["compiled"]]) {
                stan_args          <- SamplerParameters_
                stan_args$object   <- CompiledCode_[["code"]]
                stan_args$data     <- Data_
                Stanfit_[["fit"]] <<- lift(rstan::sampling)(stan_args)
            }
        },
        calcHats = function ()
        {
            BetaSamples                 <-  extract (Stanfit_$fit, pars = "Beta", permuted = TRUE)
            Hats_[["medianBetaHat"]]    <<- apply (BetaSamples$Beta, c (2, 3), median)
            ThetaHats                   <-  as.matrix (Data_$variables) %*% Hats_[["medianBetaHat"]]
            Hats_[["medianThetaHat"]]   <<- exp (c (matrixStats::colMedians(ThetaHats)))
            HeteroSamples               <-  extract (Stanfit_$fit, pars = "Hetero", permuted = TRUE)
            Hats_[["medianHeteroHats"]] <<- matrixStats::colMedians (HeteroSamples$Hetero)
            Hats_[["HeteroThetaHat"]]   <<- sweep (x = exp (ThetaHats), STATS = c(Hats_[["medianHeteroHats"]])[Data_$index_hetero], MARGIN = 1, FUN = "*")
            Hats_[["MeanTransitionProbMatrix"]] <<- calcTransitionProbMatrix (Hats_[["medianThetaHat"]], Data_[["s_worst"]], median(Hats_[["medianHeteroHats"]]))
        },
        calcTransitionProbMatrix = function (theta, s_worst, hetero)
        {
            if (is.null (s_worst)) return (diag (nrow = 5))
            out <- diag (nrow = s_worst)
            for (i in 1:(s_worst-1)) {
                for (j in i:s_worst) {
                    if (j == s_worst) {
                        prob_ij <- transitionProb_iworst (i, 1, theta, s_worst, hetero)
                    } else {
                        prob_ij <- transitionProb_ij (i, j, 1, theta, hetero)
                    }
                    out[i,j] <- prob_ij
                }
            }
            return (out)
        },
        transitionProb_ij = function (i, j, z, theta, hetero)
        {
            transitionProb <- 0.0
            for (k in i:j) {
                prod1 <- 1.0
                prod2 <- 1.0
                if (k != i) {
                    for (m in  i:(k-1)) {
                        prod1 <- prod1 * theta[m] / (theta[m] - theta[k])
                    }
                }
                if (k != j) {
                    for (m in k:(j-1)) {
                        prod2 <- prod2 * theta[m] / (theta[m+1] - theta[k])
                    }
                }
                transitionProb <- transitionProb + prod1 * prod2 * exp (- hetero * theta[k] * z)
            }
            return (ifelse (transitionProb <= EPS_ || transitionProb > 1., EPS_, transitionProb))
        },
        transitionProb_iworst = function (i, z, theta, s_worst, hetero)
        {
            transitionProb_worst <- 1
            for (h in i:(s_worst-1)) {
                transitionProb_worst <- transitionProb_worst - transitionProb_ij (i, h, z, theta, hetero)
            }
            return (ifelse (transitionProb_worst <= EPS_ || transitionProb_worst > 1., EPS_, transitionProb_worst))
        }
    )
)


# example.r
source ("./src/MarkovHazardModel_BayesMixedExp_v3.4.beta.r", encoding = "UTF-8")
bemh <- MarkovHazardModel_BayesMixedExp$new (loadLibrary = TRUE)
bemh$readSamplerParameters()
bemh$readData()
# bemh$compileStanCode()
bemh$readCompiledStanCode ("./src/MarkovHazardModel_MixedExp_v3.stan.compiled.RDS")
bemh$fitStan()
bemh$calcHats()

# graph
library (dplyr)
library (matrixStats)
y = bemh$Data_$s_worst:1
x = cumsum (c (0, 1 / bemh$Hats_[["medianThetaHat"]]))
X = bemh$Hats_$HeteroThetaHat %>%
    "/"(1, .) %>%
    cbind (rep (0, nrow (.)), .) %>%
    rowCumsums %>%
    t
plot (x, y, type = "n", xlim = c (0, 100), tcl = .3, xlab = "経年数", ylab = "健全度", las = 1)
abline (h = y, col = "lightgrey")
matlines (X, y, lwd = .8, lty = 3, col = "darkgrey")
points (x, y, type = "l", lwd = 2, col = "darkred")

こんなグラフが出てきます. 誰かの役に立ったら嬉しいです. あっ, サンプルで利用しているデータはこちらです. 流行りでも最新でもないものをなぜと言われると辛いのですがなんとなくということでご容赦を.

f:id:aaaazzzz036:20160528143545p:plain