Rで普通の数値解析その２ - サボタージュ禁止のおさぼり日記

あっ, どーも僕です。

初心者が作成したプログラムは, 計算精度や計算コストの点で十分な精度を得ることができないことが多い. したがって, 実際に利用する場合には, 自信で作成するよりも, 専門家が作成した数値計算ライブラリを利用する方が現実的である.
(p.156)

コツコツ書き溜めてきたので, 機会があれば自分のプログラムを使おうか勘違いしていたため, 痛い言葉です. 肝に銘じます.

Rで普通の数値計算その2

Rで普通の数値計算（微分方程式以外）をコツコツ実装していたのが, 少したまってきたで公開しておきます。

今回の修行本はこちら。
言語処理のための機械学習入門 (自然言語処理シリーズ)
統計的機械学習―生成モデルに基づくパターン認識 (Tokyo Tech Be‐TEXT)
はじめてのパターン認識
 数値計算 (サイエンスライブラリ理工系の数学)
計算による線形代数 (工系数学講座 2)
Rで学ぶクラスタ解析
 井尻敬さんのホームページ
これらか実装できそうなものを, 実装が楽しそうなものを, やってみました。
ただ, はじめてのパターン認識で, パーセプトロンを実装したかったのですが, どうもうまくいかなかったのでまた今度。

作ってみたのこちら。全体的にちゃんと計算できているか自信ないです.

べき乗法による固有値と固有ベクトル１
べき乗法による固有値と固有ベクトル２（こちらのが汎用的）
ヤコビ法による固有値と固有ベクトル
ハウスホルダー変換による三重対角化
ハウスホルダー変換＋二分法による固有値計算
コレスキー分解
QR分解
ムーアペンローズの逆行列（ペンローズの収束計算にもづく。n=5まで)
共役勾配による連立一次方程式の解
SOR法による連立一次方程式の解
ラグランジェ補間
エルミート補間
スプライン補間
Fuzzy-CMeans
はさみうち法による1変数方程式の解
二分割法による1変数方程式の解
線形識別関数
線形判別分析
２次正規分布による判別分析。
ガウシアンカーネルによるヒストグラム
尤度交差検証によるバンド幅の設定
K近傍法によるヒストグラム
パーゼンの窓によるヒストグラム

スクリプト

追記

スクリプトの最新版はこちらから。
Rで普通の数値解析

スクリプトはこちら。ちゃんとできているか自信ないのも混ざってますが...

べき乗法による固有値と固有ベクトル１

# べき乗法で絶対最大固有値と固有ベクトルを求める
# power2の方がいいと思う
myEigen.power <- function (A, x.ini = NULL, epsilon = 1e-18) {
    if (is.null (x.ini)) {
        x.old <- A[, 1, drop = TRUE]
    } else {
        x.old <- x.ini
    }
    gain  <- 1
    k     <- 1
    lambda.old <- 1
    while (gain > epsilon) {
        x.new  <- A %*% x.old
        lambda <- x.new [1] / x.old [1] 
        u      <- x.new / (lambda ** k)
        k      <- k + 1
        gain   <- abs (lambda - lambda.old)
        x.old  <- x.new
        lambda.old <- lambda
    }
    return (list (value = lambda, vector = x.new))
}
# 動作確認
A <- matrix (c (1, 2,
                1, 1),
             2, 2, byrow = TRUE)
print (myEigen.power (A))
# 答えはu = C * (sqrt(2), 1)となる. 成功しています.

べき乗法による固有値と固有ベクトル２（こちらのが汎用的）

# べき乗法で絶対最大固有値と固有ベクトルを求める
# max = FALSEで最小値の固有値も求められる 最小値は, Ainvの最大固有値
# 原点移動により加速させる
# 計算による線形代数
# 次数さげにり次に大きい固有も求められるが, やらない方がいい
myEigen.power2 <- function (A, 
                            p = 0,            # 原点移動
                            max = TRUE,       # 最小固有値がほしい時はFALSE
                            x = NULL,         # 初期値を指定するとき
                            epsilon = 1e-15, 
                            normalize = TRUE) {
    if (!max) {
        # 自分でも逆行列書いたけど, 一応....
        A <- solve (A)
    }
    if (is.null (x)) {
        x <- A[, 1, drop = TRUE]
        x <- x / max (x)
    }   
    B     <- A - p * diag (nrow (A))  
    gain  <- TRUE
    loops <- 1
    while (gain) {
        y      <- crossprod (t (B), x)
        lambda <- max (y)
        x.new  <- y / lambda
        gain   <- max (abs (x.new - x)) > epsilon
        x      <- x.new
        loops  <- loops + 1
    }
    #　固有ベクトルは変わらないのでそのまま正規化
    if (normalize) {
        x <- x / sqrt (c (crossprod (c(x))))
    }
    # 固有値は補正する
    if (p != 0) {
        lambda <- lambda + p
    }
    return (list (value = lambda, vector = x, loops = loops))
}
# 動作確認
A <- matrix (c (2, 1,
                1, 2),
             2, 2, byrow = TRUE)
print (myEigen.power2 (A, max = TRUE, normalize = TRUE))
print (eigen (A))

# 原点移動で早くなっているか確認
print (myEigen.power2 (A, p = 0)$loops)    # 33
print (myEigen.power2 (A, p = 7/9)$loops)  # 17

ヤコビ法による固有値と固有ベクトル(修正あり)

# ヤコビ法による固有値近似
# 実対称行列Aの固有値は実数で、直交していることを利用する
# 固有ベクトルも求めれる

# 非対角要素のうち,　絶対値が最大の要素の位置
getAbsoluteMaxValueLocation <- function (A) {
    # A はn次対称マトリックス
    nr       <- nrow (A)
    diag (A) <- 0
    A        <- abs (A)
    loca     <- which.max (A)
    cM       <- (loca %/% nr) + 1
    rM       <- loca %% nr
    if (rM == 0) {
        rM <- nr
        cM <- cM - 1
    }
    return (list (r = rM, c = cM))
}
# 動作確認
A <- matrix (rnorm (100), 10, 10)
print (getAbsoluteMaxValueLocation (A))

myEigen.Jacobi <- function (A, epsilon = 1e-16) {
    # A は実対称行列
    if (!is.matrix (A)) {
        warning ("出直して来い！")
    } else if (!all (A == t (A))) {
        warning ("出直して来い！")
    } else if (is.complex (A)) {
        warning ("出直して来い！")
    }
    
    nr   <- nrow (A)
    gain <- 1
    EigenVectorMat <- diag (nr) 
    
    while (gain > epsilon) {
        tmp   <- getAbsoluteMaxValueLocation (A)
        r.max <- tmp$r
        c.max <- tmp$c
        
        # Φを計算してcosとsinを計算する
        a.rr <- A [r.max, r.max]
        a.cc <- A [c.max, c.max]
        a.rc <- A [r.max, c.max]
        if (a.rr == a.cc) {
            phi <- pi / 4
            cosphi <- cos (phi)
            sinphi <- sin (phi)
        } else {
            s      <- .5 * (a.rr - a.cc)
            sa     <- sqrt (s ** 2 + a.rc ** 2)
            sign   <- sign (s) # sの符号
            cosphi <- sqrt (.5 *  (1 + sign *  s / sa))
            sinphi <- .5 * sign * a.rc / (sa * cosphi)
        }
        
        # 求めたcosとsinで直交行列Pをつくる
        P <- diag (nr)
        P [r.max, r.max] <- cosphi
        P [c.max, r.max] <- sinphi
        P [r.max, c.max] <- -sinphi
        P [c.max, c.max] <- cosphi
        
        
        # 解の更新
        A <- crossprod (P, crossprod (A, P))
        EigenVectorMat <- EigenVectorMat %*% P
        
        # 収束判定
        At   <- A
        diag (At) <- 0
        gain <- max (abs (At))
        
    }
    values  <- diag (A)
    order   <- order (values, decreasing = TRUE)
    values  <- values [order]
    vectors <- EigenVectorMat[, order]
    vectors <- sweep (vectors, 2, diag(crossprod (vectors)), "/")
    return (list (values = values, vectors = vectors))
    
}
# 動作確認
A <- matrix (c(3, .01, .1, 
               .01, 2, .01,
               .1, .01, 1), 
             3, 3, byrow = TRUE)
cat ("myeigen = ", myEigen.Jacobi (A, epsilon = 1e-9)$values, "\n")
cat ("eigen =", eigen (A)$values, "\n")

ハウスホルダー変換（全体的に修正　2013/11/9）

# ハウスホルダー法による行列の三重対角化
myEigen.Householder <- function (A, digits = 7) {
    nr <- nrow (A)
    nc <- ncol (A)
    if (!is.matrix (A)) {
        warning ("出直して来い！")
    }
    if (nr < 3) {
        warning ("もう出来てるわ！")
    }
    if (nr != nc) {
        warning ("出直して来い！")
    }
    library (Matrix)
    # yはxとノルムが同じになるようにsをつくるo
    for (i in 1:(nr - 2)) {
        x       <- A [i:nr, i, drop = TRUE]
        s       <- c (sqrt (crossprod (x[-1])))
        y       <- numeric (nr - i + 1)
        y[1]    <- x[1]
        y[2]    <- - sign (x[2]) * s
        u       <- x - y
        u       <- u / c (sqrt (crossprod (u)))
        Q       <- diag (nr - i + 1) - 2 * tcrossprod (u)
        H       <- as.matrix (bdiag(Diagonal(i - 1), Q))
        A       <- crossprod (H, tcrossprod (A, H))
    }  
    return (round (A, 10))
}
# 動作確認
# 実際に固有値を計算するときは結果を, myEigen_Dichotomyにわたす
A <- cov (iris[, -5])
print (myEigen.Householder (A))

ハウスホルダー変換＋二分法による固有値計算(修正あり)

# 二分法による実固有値計算　『計算による線形代数』
# Aは三重対角化ずみのもの
# ある固有値における固有方程式の符号の変化回数を返す関数
# lambda を引数にする
computeSignChanges <- function (lambda, A) {
    nr     <- nrow (A)
    phi    <- numeric (nr + 1)
    phi[1] <- 1
    phi[2] <- lambda - A[1, 1]
    for (i in 2:nr) {
        # phiの漸化式
        phi[i+1] <- (lambda - A[i,i]) * phi[i] - A[i, i - 1] ** 2 * phi[i-1]
    }
    phi <- rev (phi)
    # phiの要素で符号が変わっている回数を数える
    # 掛け算して, 符号が負のものを数える
    phi.sign <- sign (phi)
    N        <- sum ((phi.sign [-(nr+1)] * phi.sign[-1]) < 0)
    
    return (N)
}
# 本体
# 固有値を計算するにはヤコビ法よりこれがいいらしい. 
myEigen.Dichotomy <- function (A,
                               lower = NULL, # 実対称のときは0を指定した方がいい
                               upper = NULL, # 大きい行列のときはそれなりに大きく
                               epsilon = 1e-15) {
    if (!exists ("myEigen.Householder")) {
        source ("myEigen_Householder.r")
    }
    if (is.null (lower)) {
        lower <- -100
    }
    if (is.null (upper)) {
        upper <- 100
    }
    nr      <- nrow (A)
    LAMBDA  <- numeric (nr)
    t.lower <- lower
    t.upper <- upper 
    for (k in 1:nr){
        gain    <- TRUE
        while (gain) {
            c   <- (t.upper + t.lower) * .5
            val <- computeSignChanges (c, A)
            if (k <= val) {
                t.lower <- c
            } else {
                t.upper <- c
            }
            gain <- abs (t.lower - t.upper) > epsilon
        } 
        LAMBDA[k] <- c
        t.lower <- lower # 下限はもとに戻す
        t.upper <- c     # 大きい方から求めているので, 上限は狭めていく
    }
    
    return (list (values = LAMBDA))
}

# 動作確認
A <- cov (iris[, -5])
print (myEigen.Dichotomy (myEigen.Householder (A)))
print (eigen (A)$values)

コレスキー分解

# *********************************************************************************************
# コレスキー分解
# 実対称行列が引数
myCholeskyDecomposition <- function (A) {
    if (!is.matrix (A)) {
        warning ("出直して来い！")
    } else if (nrow (A) != ncol (A)) {
        warning ("出直して来い！")
    }
    nr <- nrow (A)
    L  <- matrix (0, nr, nr)
    # 一列の処理
    L[1, 1]    <- sqrt (A[1, 1])
    L[2:nr, 1] <- A[2:nr, 1] / L[1, 1]
    # 二列目以降の処理
    for (i in 2:(nr-1)) {
        # 対角成分
        L[i, i] <- sqrt (A[i, i] - crossprod (L[i, 1:(i-1)]))
        # 非対角成分
        for (j in (i+1):nr) {
            L[j, i] <- (A[j, i] - crossprod (L[j, 1:(i-1)], L[i, 1:(i-1)]))　/ L[i, i]
        }
    }
    #最後列の処理
    L[nr, nr] <- sqrt (A[nr, nr] - crossprod (L[nr, 1:(nr-1)]))
    dimnames (L) <- dimnames (A)
    return (list (L = L))
}

# 動作確認
A   <- cov (iris[,-5])
rtn <- myCholeskyDecomposition (A)
print (rtn)
tcrossprod (rtn$L, rtn$L)
chol (A)

ハウスホルダー変換に基づくQR分解　（修正あり）

# ハウスホルダー変換によるQR分解
# http://www.riken.jp/brict/Ijiri/study/QRfactorization.htm
myQRdecompositionHouseholder <- function (A, digits = 10) {
    # ブロック行列用
    library (Matrix)
    
    nr   <- nrow (A)
    nc   <- ncol (A)

    #R
    R <- A
    # Q
    Q <- diag (nr)
    
    # loop
    for (i in 1:(nc-1)) {  # 修正 2013/11/12
        x       <- R [i:nr, i, drop = TRUE]
        s       <- sqrt (c (crossprod (x)))
        y       <- numeric (nr - i + 1)
        y[1]    <-  - sign (x[1]) * s 
        u       <- x - y 
        u       <- u / c (sqrt (crossprod (u)))
        h       <- diag (nr - i + 1) - 2 * tcrossprod (u)
        H       <- as.matrix (bdiag(Diagonal(i - 1), h))
        print (H)
        R       <- crossprod (t (H), R)
        Q       <- crossprod (t (H), Q)
    }
    
    return (list (Q = round (t (Q), digits), R = round (R, digits)))
}

# 動作確認
print (t <- myQRdecompositionHouseholder (cov (iris[,-5])))
print (crossprod (t (t$Q) , t$R))
print (cov (iris[,-5]))
print (qr.Q(qr(cov(iris[,-5]))))
print (qr.R(qr(cov(iris[,-5]))))

ムーアペンローズの逆行列（ペンローズの収束計算にもづく。n=5くらいまで)

# 一般逆行列
# ペンローズの収束計算による方法
# この方法では行列が大きくなるとすぐに発散する
# パッケージではMASSのginvでは特異値分解にもとづいている(そのうち実装する)
myInverseMatrix.MoorePenrose <- function (A, epsilon = 1e-5) {
    nr <- nrow (A)
    nc <- ncol (A)
    if (nr < nc) {
        A <- t (A)
    }
    N    <- ncol (A)
    B    <- crossprod (A)
    C    <- diag (N)
    C.p  <- C
    gain <- TRUE
    k    <- 0
    while (gain) {
        C    <- C.p
        k    <- k + 1
        U    <- crossprod (t (C), B)
        C.p  <- sum (diag (U)) * diag (N) / k - U
        gain <- max (abs (crossprod (t (C.p), B))) > epsilon
        #cat (max (abs (crossprod (t (C.p), B))), "\n")
    }
    A.ginv <- k * tcrossprod (C, A) / sum (diag (U))
    if (nr < nc) {
        A.ginv <- t (A.ginv)
    }
    return (A.ginv)
}

# 動作確認
A <- matrix (c (1, 2, 
                -2, -4),
             2, 2, byrow = TRUE)
print (myInverseMatrix.MoorePenrose (A))
print (ginv (A))

# 動作確認2
# N = 5までしかちゃんと計算できない...
N <- 5
B <- matrix (rnorm (N * N), N, N)
print (myInverseMatrix.MoorePenrose (B))
print (ginv (B))

共役勾配による連立一次方程式の解

# 共役勾配法による連立一次方程式の解
# Aは実対称
# Aが非対称のときはt(A) A x = t(A) bの解を求める(つまり, t(A)Aを代入する)
myConjugateGradient <- function (A, b, epsilon = 1e-8) {
    nr    <- nrow (A)
    val   <- rnorm (nr)
    r.old <- b - crossprod (t (A), val)
    p     <- r.old
    gain  <- TRUE
    
    while (gain) {
        vec1  <- crossprod (t (A), p)
        scal1 <- c (crossprod (p, vec1))                 # スカラー
        alpha <- c (crossprod (p, r.old) / (scal1))      # スカラー
        val   <- val + alpha * p
        r.new <- r.old - alpha * vec1
        
        # 収束判定
        # 残差の最大が変化で決める?
        gain  <- max (abs (r.new - r.old)) > epsilon
        
        beta  <- - c(crossprod (r.new, vec1) / (scal1)) # スカラー
        p     <- r.new + beta * p
        r.old <- r.new
    }
    
    return (val)
}

# 動作確認
A <- matrix (c (4, 1, 0, -2,
                1, 4, -1, 1,
                0, -1, 4, -1,
                -2, 1, -1, 4), 
             4, 4, byrow = TRUE)
b <- c(5, -5, 6, -8)
print (solve (A, b)) # [1]  1 -1  1 -1
print (myConjugateGradient (A, b))

SOR法による連立一次方程式の解

# SOR法
# ガウスサイデル法の加速 omega = 1のとき, ガウスサイデルと同じ
mySOR <- function (A, b, omega = 1, val = NULL, epsilon = 1e-15) {
    nr   <- nrow (A)
    d    <- diag (A)
    D    <- b / d # 注意ベクター!!
    B    <- sweep (A, 1, d, "/"); diag (B) <- 1
    L    <-  B; L[upper.tri (B)] <- 0
    U    <- -B; U[lower.tri (B, diag = TRUE)] <- 0
    gain <- TRUE
    if (is.null (val)) {
        val <- numeric (nr)
    }
    if (!exists("myForwardSolve")) {
        source ("myForwardSolve.r")
    }
    while (gain) {
        y       <- D + crossprod (t (U), val)
        val.tmp <- myForwardSolve (cbind (L, y))       # ガウスサイデル時の更新量
        val.new <- omega * val.tmp + (1 - omega) * val # 一つ前の解との重ね合わせ
        gain　　<- max (abs (val.new - val)) > epsilon
        val     <- val.new
    }
    return (val)
}

# 動作確認1
A <- matrix (c (4, 1, 0, -2,
                1, 4, -1, 1,
                0, -1, 4, -1,
                -2, 1, -1, 4), 
             4, 4, byrow = TRUE)
b <- c(5, -5, 6, -8)
print (solve (A, b)) # [1]  1 -1  1 -1
print (mySOR (A = A, b = b, omega = 1.5))

# 動作確認2
A <- matrix (c (8, 2, 1, 1,
                1, 6, -2, 2, 
                3, 3, -7, 1,
                1, 1, 1, 5),
             4, 4, byrow = TRUE)
b <- c(-15, -6, -14, 8)
x <- c(-2.5, -1.5, 1.5, 2.7)
print (solve (A, b))
print (mySOR (A = A, b = b, val = x))

ラグランジェ補間

#****************************************
#Lagrange補間多項式
#****************************************
myLagrangePolyInterporation <- function (x, X, Y) {
    #補間多項式の要素
    lag <- function(i, x)  prod(x - X[-i]) / prod(X[i] - X[-i]) * Y[i]
    
    #補間多項式
    Lag <- function(x) sum(sapply(1:length(X), lag, x))
    
    #結果
    y <- sapply(x, Lag)
    
    return (y)
    
}
# 動作確認確認
#データ点
f  <- function (X) {
    1 / (1 + 25 * X ** 2)
}
X  <- seq (-1, 1, .1)
Y  <- f(X)
x  <- seq (-1, 1, .05)
y  <- myLagrangePolyInterporation (x, X, Y)
#グラフづくり
def.par <- par(no.readonly = TRUE) 
par(mai = c(1.5, 1.5, 0.5,0.5))
curve(f, -1, 1, col = "#f39800", lwd = 3,　asp = 1, ylim = c(0, 1.2), 
      bg = "white", cex = 1.3, cex.lab = 1.5, cex.axis = 1.3)
points(X, Y, pch = 19, col = "#0068b7", cex = 1.3)
points(x, y, type = "l", lwd = 1.5)
grid()
legend ("topright", lty = c(-1, 1, 1), legend = c("サンプル点", "正解", "ラグランジェ補間"), 
        col = c("#0068b7", "#f39800", 1), pch = c(19, -1, -1), lwd = 2, pt.cex = 2,  cex = 1.5, 
        bty = "n")
par(def.par)

エルミート補間

#***********************************************
# Lagrange補間多項式の拡張のエルミート補間多項式
# 一階微分の値も与えられたときの話
# ちゃんと計算できている自信ない. 
#************************************************
myHermiteInterporation <- function (x, X, Y1, Y2) {
    dlen <- length (X)
    # X, Y1, Y2がデータ点
    # ひとつのデータのエルミートを計算する関数を用意してそれをsapply
    # エルミート多項式の要素
    myHermite1 <- function (i, x) {
        lag2 <- prod ((x - X[-i]) / (X[i] - X[-i])) ** 2
        lagp <- sum (1 / (X[i] - X[-i]))
        h <- lag2 * (1 - 2 * (x - X[i]) * lagp)
        g <- (x - X[i]) * lag2
        return (sum (Y1[i] * h + Y2[i] * g))
    }
    # 足し合わせる
    myHermite2 <- function (x) {
        sum (sapply (1:dlen, myHermite1, x))
    }    
    # 引数のデータ点すべてで上の計算をしてみる
    ans <- sapply (x, myHermite2)  
    return (ans)
}
# 動作確認確認
# データ点
f1  <- function (X) { # 補間したい関数
    1 / (1 + 25 * X ** 2)
}
f2  <- function (X) { # 一回微分
     - 50 * X / ((1 + 25 * X **2) ** 2)
}
X  <- seq (-1, 1, .1)
Y1 <- f1(X)
Y2 <- f2(X)
x  <- seq (-1, 1, .05)
y  <- myHermiteInterporation (x = x, X = X, Y1 = Y1, Y2 = Y2)
# グラフ
def.par <- par(no.readonly = TRUE) 
par(mai = c(1.5, 1.5, 0.5,0.5))
curve(f1, -1, 1, col = "#f39800", lwd = 3,　asp = 1, ylim = c(0, 1.2), 
      bg = "white", cex = 1.3, cex.lab = 1.5, cex.axis = 1.3)
points(X, Y1, pch = 19, col = "#0068b7", cex = 1.3)
points(x, y, type = "l", lwd = 1.5)
grid()
legend ("topright", lty = c(-1, 1, 1), legend = c("サンプル点", "正解", "エルミート補間"), 
        col = c("#0068b7", "#f39800", 1), pch = c(19, -1, -1), lwd = 2, pt.cex = 2,  cex = 1.5, 
        bty = "n")
par(def.par)

スプライン補間

# ３次スプライン補間
# alpha = 0, beta = 0は自然スプラインって呼ばれる
mySplineInterporation <- function (x, X, Y, alpha = 0, beta = 0) {
    # 一応自前のヤコビ法を使うよ
    if (!exists ("myJacobiIteration")) {
        source ("myJacobiIteration.r")
    }
    h  <- X[2] - X[1] # 分点は等間隔に与える
    # ２回微分スプライン係数を計算する関数
    mySpline2d <- function (Y = Y, alpha = alpha, beta = beta, h = h) {
        h2 <- h ** 2
        nr <- length (Y) # n + 1 点
        # 連立方程式の係数行列A
        # n - 1次元
        A <- diag (nr - 2) * 4
        for (i in 1:(nr - 3)) {
            A[i, i + 1] <- 1
            A[i + 1, i] <- 1
        }
        # 連立方程式の右辺
        b      <- Y[-c(nr - 1, nr)] - 2 * Y[-c(1, nr)] + Y[-c(1, 2)]
        b [1]  <- b[1] - h2 * alpha / 6
        b [nr - 2] <- b[nr - 2] - h2 * beta / 6
        b <- b * 6 / (h2)
    
        # 連立方程式を解く
        return (myJacobiIteration (A, b))
    }
    # データ点におけるsの二階微分の値
    # alphaとbetaは端点の値で最初に決めておく
    spline2d <- c(alpha, mySpline2d (Y, alpha, beta, h), beta)
    
    # データ点におけるsの一階微分の関数
    mySpline1d <- function (spline2d = spline2d, Y = Y, X = X, h = h) {
        nr       <- length (Y) 
        deltaX   <- X[-1] - X[-nr]
        deltaX2  <- deltaX ** 2
        deltaY   <- Y[-1] - Y[-nr]
        deltaS   <- spline2d[-1] - spline2d[-nr]
        s        <- spline2d [-nr]
        
        rtn <- (deltaY - s * .5 * deltaX2 - deltaS * deltaX2 / 6) / deltaX
        return (rtn)
    } 
    # データ点におけるsの一階微分値
    spline1d <- mySpline1d (spline2d, Y, X, h)
    
    
    # 区間kの点xのスプライン値を計算する関数
    # ちょっと効率悪いか・・・
    mySpline <- function (x, k, X, Y, spline1d, spline2d, h) {
        ans <- Y[k] + spline1d[k] * (x - X[k]) + spline2d[k] * .5 * (x - X[k]) ** 2 + (spline2d[k + 1] - spline2d[k]) * (x - X[k]) ** 3 / 6 / h
        return (ans)
    }
    dlen  <- length (x)
    class <- c (cut (x, breaks = X, include.lowest = TRUE))
    y     <- numeric (dlen)
    for (i in seq_len (dlen)) {
        y [i] <- mySpline (x = x[i], k = class[i], X = X, Y = Y, spline1d = spline1d, spline2d = spline2d, h = h)
    }
    
    return (y)
    
}

# いつもの関数で動作確認
f  <- function (X) {
    1 / (1 + 25 * X ** 2)
}
X  <- seq (-1, 1, .1)
Y  <- f(X)
x  <- seq (-1, 1, .05)
y  <- mySplineInterporation (x = x, X = X, Y = Y)
#グラフづくり
def.par <- par(no.readonly = TRUE) 
par(mai = c(1.5, 1.5, 0.5,0.5))
curve(f, -1, 1, col = "#f39800", lwd = 3, asp = 1, ylim = c(0, 1.2), 
      bg = "white", cex = 1.3, cex.lab = 1.5, cex.axis = 1.3)
points(X, Y, pch = 19, col = "#0068b7", cex = 1.3)
points(x, y, type = "l", lwd = 1.5)
grid()
legend ("topright", lty = c(-1, 1, 1), legend = c("サンプル点", "正解", "スプライン補間"), 
        col = c("#0068b7", "#f39800", 1), pch = c(19, -1, -1), lwd = 2, pt.cex = 2,  cex = 1.5, 
        bty = "n")
box()
par(def.par)

Fuzzy-CMeans

#*********************************************************************************
# Fuzzy c-means (正解集合保有時のエントロピー付き)
# Rで学ぶクラスタ解析
# 正解集合がないもに使うときはエントロピーを適当にはずして
fuzzyCmeans <- function (data, m = 2, K = 2, trueClass, cumEntro = FALSE) 
{
    # data  : 行べクトルのマトリックス
    # m     : ファジー具合(mは1より大きい値)
    # K     : 分割するクラス数
    if(m<=1) {
        cat("出直してきな！！\n")
        return (0)
    }
    
    if (!is.matrix(data)) {
        cat("出直してきな！！\n")
        return (0)
    }  
    
    N <- nrow (data) # データ数
    p <- ncol (data) # データの次元数
    
    
    
    # クラスごとの重心の初期値
    C <- matrix(rnorm(K * p), nrow = K, ncol = p) #クラス×次元
    
    # 準備
    gain <- FALSE
    n    <- 2
    W    <- matrix (0, N, K)　# データス×クラス
    ans  <- matrix(0, 1e5, N); ans[1, ] <- sample(1:K, N, replace = TRUE)
    anG  <- array(0, c(K, p, 1e5)); anG[,,1] <- C
    
    # エントロピーを求める関数
    # ちょっとややこしく書いているかもだけど正しく計算はできているはず..
    # LとKが異なると計算できないのかな？まあ, 前提はKは既知だけど
    if (cumEntro) {
        myEntropy <- function (true, ans) {
            true <- factor (true, levels = 1:K)
            ans  <- factor (ans, levels = 1:K)
            T     <- as.matrix(table (true, ans))
            probT <- sweep (T, 1, rowSums(T), "/")
            E   <- numeric(K)
            for (k in seq_len (K)) {
                tmp <- probT[k, , drop = TRUE]; tmp <- tmp [tmp > 0]
                if(length (tmp) == 0) {
                    E[k] <- 0
                } else {
                    E[k] <- - sum(tmp * log(tmp))
                }
            }
            rtn <- sum(rowSums(T) * E) / N 
            return (rtn)
        }
        entropy <- numeric(1e5)
        entropy[1] <- myEntropy (trueClass, ans[1, , drop = TRUE])
    }
    
    
    
    # 収束計算
    while (!gain) {
        for (k in seq_len (K)) {
            W[,k] <- rowSums (sweep(data, 2, C[k,], "-") ** 2)
        }
        
        # 重みの更新
        weight <- W ** (2 / (m - 1))
        weight <- 1 / sweep (weight, 1, rowSums(1 / weight), "*")
        
        # 重心の更新
        gm   <- weight ** m　　# N, クラス
        rgm  <- colSums (gm)   # クラス
        ugm  <- t(gm) %*% data # クラス　, 次元
        Cnew <- sweep (ugm, 1, rgm, "/")# N 次元
        
        # あと処理
        C    <- Cnew
        ans[n, ] <- apply (weight, 1, which.max)
        anG[, , n] <- C
        if (cumEntro){
            entropy[n] <- myEntropy(trueClass, ans[n,])
        }
        gain <- all(ans[n, ] == ans[n-1, ])
        
        n <- n + 1
    }
    ans <- ans[1:(n-1), ]
    anG <- anG[,,1:(n-1)]
    if (cumEntro) {
        entropy <- entropy[1:(n-1)]
    } else {
        entropy <- NA
    }
    
    www <- apply (weight, 1, max)
    return (list(class = ans, iter = n, cood = anG, en = entropy, w = www))
    
}

# 動作確認
library(mvtnorm)
x <- rbind(rmvnorm(100, mean = c(0, 0), sigma = diag(2)), rmvnorm (100, mean = c(2, 2), sigma = diag(2)))
p <- fuzzyCmeans(x, m = 2, K = 2, trueClass = rep(1:2, each = 100), cumEntro = TRUE) 
# trueclassとKの数を一致させないとエントロピーは計算できないよ
# 分割の結果
# col
nr <- nrow (p$class)
t1 <- p$class[nr, ] == 1
t2 <- p$class[nr, ] == 2
t3 <- p$class[nr, ] == 3
plot (x, col = p$class[5,] + 1, pch = 19, type = "n")
points (x[t1, ], bg = rgb (1, 0, 0, alpha = p$w - .2), pch = 21, col = NULL, cex = 1.3)
points (x[t2, ], bg = rgb (0, 1, 0, alpha = p$w - .2), pch = 21, col = NULL, cex = 1.3)
points (x[t3, ], bg = rgb (0, 0, 1, alpha = p$w - .2), pch = 21, col = NULL, cex = 1.3)
points (p$cood[, ,5], pch = 19, cex = 2)

はさみうち法（ニュートン法の近似)による1変数方程式の解

# はさみ撃ち法による方程式の解
# 二分法のように中点ではなくて, 一次方程式の解を探索点とする
myPincerMethod <- function (FUN, left, right, epsilon = 1e-12) {
    val_left  <- FUN (left)
    val_right <- FUN (right)
    gain      <- TRUE
    while (gain) {
        c    <- left - (right - left) * val_left / (val_right - val_left)
        val  <- FUN (c)
        sign <- val_left * val
        if (sign > 0) {
            left     <- c
            val_left <- val
        } else {
            right     <- c
            val_right <- val
        }
        gain <- abs (val) > epsilon
    }
    
    return (list (x = c, val = val))
}
# 動作確認
f <- function (x) 5 * x ** 3 - 7 * x ** 2 + x - 9
a <- 0
b <- 3
print (myPincerMethod (f, a, b))

二分割法による1変数方程式の解

# 二分法による方程式の解
myDichotomy <- function (FUN, left, right, epsilon = 1e-7) {
    #x in (left, right) で方程式の根をみつける
    val_left  <- FUN (left)
    val_right <- FUN (right) 
    gain <- TRUE
    while (gain) {
        c    <- .5 * (left + right)
        val  <- FUN (c)
        sign <- sign (val * val_left)
        if (sign > 0) {
            left <- c
            val_left <- val
        } else {
            right <- c
            val_right <- val
        }
        gain <- abs (val) > epsilon
    }
    
    return (list (x = c, val = val))
}
# 動作確認
f <- function (x) 5 * x ** 3 - 7 * x ** 2 + x - 9
a <- 0
b <- 3
print (myDichotomy (f, a, b))

線形識別関数

# はじパタ75
# 線形識別関数
myLinearDiscriminateFun <- function (X, C) {
    library (Matrix) # ダミー変数づくり用
    # X は行ベクトルのデータ
    # C は正解クラス 
    X       <- as.matrix (X)
    C.dummy <- t (as.matrix (as (C, "sparseMatrix") ))
    W.hat   <- solve (crossprod (X, X), crossprod (X, C.dummy))
    C.hat   <- t (crossprod (W.hat, t (X)))
    ans     <- max.col (C.hat)
    
    classes <- unique (C)
    return (list (ans = classes[ans], conf = C.hat, w = W.hat))
    
}

# 動作確認
myLinearDiscriminateFun (iris[, 1:2], iris[, 5])

線形判別分析

# はじめてのパターン認識　4章
# 線形判別分析になる
my2DGetNormalDiscriminantLinear<- function (dat, class.col = NULL) {
    if (is.null (class.col)) {
        class.col <- ncol (dat)
    }
    weight    <- table (dat[,3]) / nrow (dat)
    cols      <- ncol (dat)
    col.index <- seq_len (cols)
    classes   <- length (unique (dat [, class.col]))
    mean.vectors    <- by (dat[, -class.col], dat[, class.col], colMeans)
    covariances     <- by (dat[, -class.col], dat[, class.col], cov) 
    covariance.pool <- weight [1] * covariances [[1]] + weight[2] * covariances [[2]]
    inv.covariances <- solve (covariance.pool)
    det.covariances <- det (inv.covariances)
    
    c1 <- crossprod (mean.vectors[[1]], inv.covariances)
    c2 <- crossprod (mean.vectors[[2]], inv.covariances)
    C  <- matrix (2 * (c2 - c1), ncol = 1)
    
    F1 <- 0
    F2 <- crossprod (t (c1), mean.vectors[[1]])
    F3 <- crossprod (t (c2), mean.vectors[[2]])
    Ft <- F2 - F3 + F1
    
    # N * N 点で等高線を書く
    N <- 100
    x <- seq (min (dat[,1]), max (dat[,1]), length.out = N)
    y <- seq (min (dat[,2]), max (dat[,2]), length.out = N)
    X <- expand.grid (x, y)
    z <- c(crossprod (C, t (X))) + Ft
    z <- matrix (z, N, N)
    
    
    # outerが動かないのなんでなの？
    plot (dat[,-3], pch = c(1, 2)[c(dat[,3])], cex = 1.2, col = c("blue2", "red2")[c(dat[,3])], tcl = .5, las = 1)
    contour (x, y, z, add = TRUE, col = "darkgrey", nlevels = 10)
    contour (x, y, z, add = TRUE, col = 1, nlevels = 1)
    
    ans <- c(crossprod (C, t(dat[, -3]))) + Ft
    return (list (ans = ans))
}

# 動作確認
data (Pima.tr)
dat <- Pima.tr [, c("glu","bmi", "type")]
print (my2DGetNormalDiscriminantLinear (dat))

２次正規分布による判別分析

# はじめてのパターン認識 4章
# 二次元正規分布モデルによる判別分析をする関数
# 共分散分析を同じ値にすれば線形判別分析になる
my2DGetNormalDiscriminantAnalysis <- function (dat, class.col = NULL) {
    if (is.null (class.col)) {
        class.col <- ncol (dat)
    }
    cols      <- ncol (dat)
    col.index <- seq_len (cols)
    classes   <- length (unique (dat [, class.col]))
    mean.vectors    <- by (dat[, -class.col], dat[, class.col], colMeans)
    covariances     <- by (dat[, -class.col], dat[, class.col], cov) 
    inv.covariances <- array (unlist (lapply (covariances, solve)), c (cols - 1, cols - 1, classes))
    det.covariances <- apply (inv.covariances, c(3), det)
    
    S  <- inv.covariances [, , 1] - inv.covariances [, ,2]
    c1 <- crossprod (mean.vectors[[1]], inv.covariances [, , 1])
    c2 <- crossprod (mean.vectors[[2]], inv.covariances [, , 2])
    C  <- matrix (2 * (c2 - c1), ncol = 1)
 
    F1 <- log (det.covariances[1]) - log (det.covariances[2])
    F2 <- crossprod (t (c1), mean.vectors[[1]])
    F3 <- crossprod (t (c2), mean.vectors[[2]])
    Ft <- F2 - F3 + F1

    # N * N 点で等高線を書く
    N <- 100
    x <- seq (min (dat[,1]), max (dat[,1]), length.out = N)
    y <- seq (min (dat[,2]), max (dat[,2]), length.out = N)
    X <- expand.grid (x, y)
    z <- diag (crossprod (t (X), crossprod (S, t (X)))) + c(crossprod (C, t (X))) + Ft
    z <- matrix (z, N, N)

    
    # outerが動かないのなんでなの？
    plot (dat[,-3], pch = c(1, 2)[c(dat[,3])], cex = 1.2, col = c("blue2", "red2")[c(dat[,3])], tcl = .5, las = 1)
    contour (x, y, z, add = TRUE, col = "darkgrey", nlevels = 10)
    contour (x, y, z, add = TRUE, col = 1, nlevels = 1)
    
    ans <- diag (crossprod (t(dat[,-3]), crossprod (S, t(dat[, -3])))) + c(crossprod (C, t(dat[, -3]))) + Ft
    return (list (ans = ans))
}

# 動作確認
data (Pima.tr)
dat <- Pima.tr [, c("glu","bmi", "type")]
print (my2DGetNormalDiscriminantAnalysis (dat))

ガウシアンカーネルによるヒストグラム

# カーネル密度推定法
# ガウスカーネルを使用
# 一次元って割り切ればもっと早くなる？ というかめっちゃ遅い..... 
# 統計的機械学習 12章, 13章(pp.161-187) オーム社
# 杉山将, 平成21年9月25
# 体積b^dはよくわからんけど、変数変換と考えていいはず
# |det(V)|で対角成分がすべて
# 正規化した正規分布の重ね合わせで表現する
myKernelDensityEstimation1d <- function (dat,# 標本
                                         band.width = 0.1, 
                                         upper = NULL, 
                                         lower = NULL,
                                         d = 1) {

    # 上限値と下限値が設定されていないときは
    dat.sd <- sd (dat)
    if (is.null (upper)) {
        upper  <- max (dat) +  dat.sd
    } 
    if (is.null (lower)) {
        lower  <- min (dat) -  dat.sd
    }
    n           <- length (dat)　    　　　　　# データ数
    Vr          <- 1 / (n * (band.width ** d)) # band.width ** dは正規化するときの変数変換の際に出てくるイメージ
    #dr          <- 1 / ((2 * pi) ** (d / 2))　
    
    # 点xにおける密度を求める関数
    if (d == 1) {
        myGKD <- function (x, dat, band.width) {
            y <- (x - dat) / band.width
            sum (dnorm (y))
        }
        x       <- seq (lower, upper, length.out = 512)
        probden <- sapply (x, myGKD, dat = dat, band.width = band.width)
    } else {
        warnings ("ごめんなさい多次元のときは未完成")
    }
    
    return (list (den = probden * Vr , 
                  x = x, band.width = band.width,
                  N = n))
}

# 動作確認
# upperとlowerをあわせられればもっとdensityとあう？
dat <- rnorm (100)
rtn <- myKernelDensityEstimation1d (dat = dat, band.width = .35)
plot(density(dat), col = "orange", type = "l", lwd = 2)
points(rtn$x, rtn$den, type = "l", lwd = 2)
legend("topright", lty = 1, lwd = 3, cex = 1.5, legend = c("density", "GaussKernel"), col = c("orange", 1), bty = "n")

尤度交差検証によるバンド幅の設定

# ガウスカーネル密度のバンド幅の検証
# できているかよくわらんけど, それらしい値にいってるのか？
# もしかして局所解？
myLikelihoodCrossValidationBand <- function (dat, 
                                             band.width = NULL, 
                                             upper = NULL, 
                                             lower = NULL) {
    dsd <- sd (dat)
    if (is.null (upper)) {
        upper <- max (dat) + dsd
    }
    if (is.null (lower)) {
        lower <- min (dat) - dsd
    }
    
    if (is.null (band.width)) {
        band.width <- (upper - lower) / 100 # 十分に小さい値から始める
    }
    myGKD <- function (x, dat, band.width) {
        y <- (x - dat) / band.width
        sum (dnorm (y))
    }

    # band.widthはざっくり値を代入してね
    n     <- length (dat)

    # バンド幅を与えたときに平均対数尤度を返す関数
    computeMeanLL <- function (dat, b) {
        n     <- length (dat)
        class <- sample (n %/% 10, n, replace = TRUE)
        B     <- unique (class)
        # クラスiのときの
        for (i in B) {
            id.class.k  <- which (class == i)
            nr          <- length (id.class.k)
            dat.class.k <- dat [id.class.k]
            likelihood  <- numeric(nr)
            # データjのときの
            for (j in 1:nr) {
                small.dat.class.k <- dat.class.k [-j]
                likelihood[j] <-  myGKD (x = dat.class.k[j], dat = dat.class.k, band.width =b)
            }
        }
        mean.loglikelihood <- mean (log (likelihood))
        return (mean.loglikelihood)
    }
    
    # 
    gain <- TRUE
    h    <- .01
    sign.lll <- 1
    while (gain) {
       h <- sign.lll * abs (h)
       b.old    <- band.width
       lll.old  <- computeMeanLL (dat = dat, b.old)
       b.can    <- band.width + h
       lll.can  <- computeMeanLL (dat = dat, b.can) 
       sign.lll <- sign (lll.can - lll.old)
      
       # バンド幅を増やす
       if (sign.lll > 0) {
           while (sign.lll > 0) {
               h <- h * 2
               b.old    <- b.can
               lll.old  <- computeMeanLL (dat = dat, b.old)
               b.can    <- b.can + h
               lll.can  <- computeMeanLL (dat = dat, b.can) 
               sign.lll <- sign (lll.can - lll.old)
           }
           band.width <- b.old; h <- h/2
   
       } else {
           # バンド幅を減らしていｒク
           while (sign.lll < 0) {
               h <- h / 2
               b.can    <- b.can - h
               lll.can  <- computeMeanLL (dat = dat, b.can) 
               sign.lll <- sign (lll.can - lll.old)

           }
           band.width <- b.can; h <- h * 2       
       }        
       gain     <- abs (lll.old - lll.can) > 1e-10
    }
    
    return (band.width)
}
dat <- rnorm (1000)
print (myLikelihoodCrossValidationBand (dat = dat))

K近傍法によるヒストグラム

# k-最近傍密度推定法
# 標本数kを固定して体積を変えるもの
# kernelとパー全はkを変える
# k = 1のときは特に最近傍密度推定法
# 取りあえず一次元
# ちょっと微妙？？？？
myKNearestNeighborDensity1d <- function (dat, 
                                         k = NULL,
                                         upper = NULL, 
                                         lower = NULL, 
                                         d = 1) {
    ga   <- gamma (1 + d/2)
    n    <- length (dat)
    pi2d <- pi ** (d / 2) 
    if (is.null (k)) {
        # kの基本設定はsqrt (n)
        # より大きくするとなめらかになる
        k <- sqrt (n) 
    }
    dat.sd <- sd (dat)
    if (is.null (upper)) {
        upper <- max (dat) + dat.sd
    }
    if (is.null (lower)) {
        lower <- min (dat) - dat.sd
    }
    # rを決める関数
    getR <- function (x, dat = dat, k = k, r.upper = r.upper) {
        rM1   <- rM2 <- r.upper
        h     <- .5 * r.upper
        gain  <- TRUE
        while (gain) {
            c    <- sum (abs (x - dat) <= rM1) # xからの距離がrM1以下のデータ数
            rM1  <- rM1 - sign (c - k) * h
            h    <- h * .5　                   # 変化量は倍々で小さくする
            gain <- abs (rM1 - rM2) > 1e-15
            rM2  <- rM1
        }
        r <- rM1
        return (r)
    }
    
    r.upper <- max (dat) + sd (dat) * 3
    x   <- seq (lower, upper , length.out = 512)
    r   <- sapply (x, getR, dat = dat, k = k, r.upper = r.upper) ** d
    den <- k * ga / r / pi2d / n
    
    return (list (den = den, x = x))
    
}
dat <- rnorm (1000)
rtn <- myKNearestNeighborDensity1d (dat = dat)
plot (rtn$x, rtn$den, type = "l")
points(density(dat), col = "orange", type = "l", lwd = 2)
legend ("topright", legend = c("density", "KNearest"), lty = 1, 
        lwd = 2, col = c("orange", 1), cex = 2, bty = "n")

パーゼンの窓によるヒストグラム

# パーゼンの窓
# カーネルよりもバンド幅は大きくした方が良い感じ
# 一次元
myParzeWindowDensity1d <- function (dat, 
                                    band.width, 
                                    upper = NULL, 
                                    lower = NULL,
                                    d     = 1) {
    n      <- length (dat)
    dat.sd <- sd (dat)
    if (is.null (upper)) {
        upper <- max (dat) + dat.sd
    }
    if (is.null (lower)) {
        lower <- min (dat) - dat.sd
    }
    myPWD <- function (x, dat = dat, band.width = band.width) {
        x <- (x - dat) / band.width
        k <- sum (abs (x) <= .5)
        return (k)
    }
    x   <- seq (lower, upper, length.out = 512)
    den <- sapply (x, myPWD, dat = dat, band.width = band.width) / n
    
    return (list (den = den, x = x, band.width = band.width, N = n))
    
}
dat <- rnorm (1000)
rtn <- myParzeWindowDensity1d (dat = dat, band.width = 1)
plot(density(dat), col = "orange", type = "l", lwd = 2)
points (rtn$x, rtn$den, type = "s")
legend ("topright", legend = c("density", "ParzeWindow"), lty = 1, 
        lwd = 2, col = c("orange", 1), cex = 2, bty = "n")