PRML上巻 P5-6
昨日の続きをやっていく.
の多項式の字数を選ぶ問題は,モデル比較(model comparioson)あるいはモデル選択(model selection)と呼ばれる.
例として,次数の多項式を当てはめてみる. 以下,コード(めちゃくちゃ汚いので後日修正する).
%matplotlib inline import matplotlib.pyplot as plt import numpy as np np.random.seed(42)
# t = sin(2 \pi x) x1 = np.linspace(0, 1, 100) t1 = np.sin(2 * np.pi * x1) # t = sin(2 \pi x) + gaussian noise x2 = np.linspace(0, 1, 10) t2 = np.sin(2 * np.pi * x2) + np.random.normal(0, 0.2, 10)
# figure 1.4 def gradientDescentM0(X, t, w, alpha, num_iters): """Performs gradient descent to learn `w`.""" n = t.shape[0] w = w.copy() def y(X, w): return w[0] * X[:, 0] for i in range(num_iters): w0 = w[0] - alpha * (1 / n) * np.sum((y(X, w) - t) * X[:, 0]) w = [w0] return w def gradientDescentM1(X, t, w, alpha, num_iters): """Performs gradient descent to learn `w`.""" n = t.shape[0] w = w.copy() def y(X, w): return w[0] * X[:, 0] + w[1] * X[:, 1] for i in range(num_iters): w0 = w[0] - alpha * (1 / n) * np.sum((y(X, w) - t) * X[:, 0]) w1 = w[1] - alpha * (1 / n) * np.sum((y(X, w) - t) * X[:, 1]) w = [w0, w1] return w def gradientDescentM3(X, t, w, alpha, num_iters): """Performs gradient descent to learn `w`.""" n = t.shape[0] w = w.copy() def y(X, w): return w[0] * X[:, 0] + w[1] * X[:, 1] + w[2] * np.power(X[:, 1], 2) + w[3] * np.power(X[:, 1], 3) for i in range(num_iters): w0 = w[0] - alpha * (1 / n) * np.sum((y(X, w) - t) * X[:, 0]) w1 = w[1] - alpha * (1 / n) * np.sum((y(X, w) - t) * X[:, 1]) w2 = w[2] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 2)) w3 = w[3] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 3)) w = [w0, w1, w2, w3] return w def gradientDescentM9(X, t, w, alpha, num_iters): """Performs gradient descent to learn `w`.""" n = t.shape[0] w = w.copy() def y(X, w): return w[0] * X[:, 0] + w[1] * X[:, 1] + w[2] * np.power(X[:, 1], 2) + w[3] * np.power(X[:, 1], 3) \ + w[4] * np.power(X[:, 1], 4) + w[5] * np.power(X[:, 1], 5) + w[6] * np.power(X[:, 1], 6) \ + w[7] * np.power(X[:, 1], 7) + w[8] * np.power(X[:, 1], 8) + w[9] * np.power(X[:, 1], 9) for i in range(num_iters): w0 = w[0] - alpha * (1 / n) * np.sum((y(X, w) - t) * X[:, 0]) w1 = w[1] - alpha * (1 / n) * np.sum((y(X, w) - t) * X[:, 1]) w2 = w[2] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 2)) w3 = w[3] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 3)) w4 = w[4] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 4)) w5 = w[5] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 5)) w6 = w[6] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 6)) w7 = w[7] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 7)) w8 = w[8] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 8)) w9 = w[9] - alpha * (1 / n) * np.sum((y(X, w) - t) * np.power(X[:, 1], 9)) w = [w0, w1, w2, w3, w4, w5, w6, w7, w8, w9] return w # add w_0 and stack examples X = np.stack([np.ones(x2.size), x2], axis=1) y = t2 iterations = 100000 alpha = 0.1 w0 = gradientDescentM0(X, y, np.zeros(1), alpha, iterations) w1 = gradientDescentM1(X, y, np.zeros(2), alpha, iterations) w3 = gradientDescentM3(X, y, np.zeros(4), alpha, iterations) w9 = gradientDescentM9(X, y, np.zeros(10), alpha, iterations) fig, axs = plt.subplots(2, 2, figsize=(12, 8)) axs[0, 0].set_title('M = 0') axs[0, 1].set_title('M = 1') axs[1, 0].set_title('M = 3') axs[1, 1].set_title('M = 9') for ax in axs.flat: ax.plot(x1, t1, color='green') ax.scatter(x2, t2, facecolor='None', edgecolor='blue') ax.set(xlabel='x', ylabel='t') def y3(X, w): return w[0] * X[:, 0] + w[1] * X[:, 1] + w[2] * np.power(X[:, 1], 2) + w[3] * np.power(X[:, 1], 3) def y9(X, w): return w[0] * X[:, 0] + w[1] * X[:, 1] + w[2] * np.power(X[:, 1], 2) + w[3] * np.power(X[:, 1], 3) \ + w[4] * np.power(X[:, 1], 4) + w[5] * np.power(X[:, 1], 5) + w[6] * np.power(X[:, 1], 6) \ + w[7] * np.power(X[:, 1], 7) + w[8] * np.power(X[:, 1], 8) + w[9] * np.power(X[:, 1], 9) # M = 0 axs[0, 0].plot(x1, w0[0] * np.ones(x1.size), color='red') # M = 1 axs[0, 1].plot(x1, w1[0] * np.ones(x1.size) + w1[1] * x1, color='red') # M = 3 axs[1, 0].plot(x1, y3(np.stack([np.ones(x1.size), x1], axis=1), w3), color='red') # M = 9 axs[1, 1].plot(x1, y9(np.stack([np.ones(x1.size), x1], axis=1), w9), color='red') plt.tight_layout()
の図との図が書籍と異なり収束していないことがわかる. 最急降下法だと収束に時間がかかるので,別の方法を取ったほうが良さそうだ.
今日はここまで.