# ruff: noqa: E402
# ruff: noqa: E501

Fitting a polynomial with Gaussian priors#

We fit a simple polynomial with Gaussian priors, which is an example of a Gauss-linear problem for which the results obtained using Subspace Iterative Ensemble Smoother (SIES) tend to those obtained using Ensemble Smoother (ES). This notebook illustrated this property.

import itertools

import numpy as np
import pandas as pd

np.set_printoptions(suppress=True)
rng = np.random.default_rng(12345)

import matplotlib.pyplot as plt

COLORS = list(plt.rcParams["axes.prop_cycle"].by_key()["color"])

plt.rcParams["figure.figsize"] = (6, 6)
plt.rcParams.update({"font.size": 10})
from ipywidgets import interact  # noqa  # isort:skip
import ipywidgets as widgets  # noqa  # isort:skip
from p_tqdm import p_map

import iterative_ensemble_smoother as ies

Define synthetic truth and use it to create noisy observations#

ensemble_size = 200


def poly(a, b, c, x):
    return a * x**2 + b * x + c


# True patameter values
a_t = 0.5
b_t = 1.0
c_t = 3.0

noise_scale = 0.1
x_observations = [0, 2, 4, 6, 8]
observations = [
    (
        rng.normal(loc=1, scale=noise_scale) * poly(a_t, b_t, c_t, x),
        noise_scale * poly(a_t, b_t, c_t, x),
        x,
    )
    for x in x_observations
]

d = pd.DataFrame(observations, columns=["value", "sd", "x"])
d = d.set_index("x")
num_obs = d.shape[0]

fig, ax = plt.subplots(figsize=(7, 4))
x_plot = np.linspace(0, 10, 2**8)
ax.set_title("Truth and noisy observations")
ax.set_xlabel("Time step")
ax.set_ylabel("Response")
ax.plot(x_plot, poly(a_t, b_t, c_t, x_plot))
ax.plot(d.index.get_level_values("x"), d.value.values, "o")
ax.grid()
fig.tight_layout()
plt.show()
_images/5788a16eaa482c3be9b65fd3bc896c5a0dd1bf1f7993f5d294fc62576fe9b3de.png

Assume diagonal observation error covariance matrix and define perturbed observations#

R = np.diag(d.sd.values**2)

E = rng.multivariate_normal(mean=np.zeros(len(R)), cov=R, size=ensemble_size).T
assert E.shape == (num_obs, ensemble_size)

D = d.value.values.reshape(-1, 1) + E

Define Gaussian priors#

coeff_a = rng.standard_normal(size=ensemble_size)
coeff_b = rng.standard_normal(size=ensemble_size)
coeff_c = rng.standard_normal(size=ensemble_size)

X = np.vstack([coeff_a, coeff_b, coeff_c])

Run forward model in parallel#

fwd_runs = p_map(
    poly,
    coeff_a,
    coeff_b,
    coeff_c,
    [np.arange(max(x_observations) + 1)] * ensemble_size,
    desc="Running forward model.",
)

Pick responses where we have observations#

Y = np.array(
    [fwd_run[d.index.get_level_values("x").to_list()] for fwd_run in fwd_runs]
).T

assert Y.shape == (
    num_obs,
    ensemble_size,
), "Measured responses must be a matrix with dimensions (number of observations x number of realisations)"

Condition on observations to calculate posterior using both ES and SIES#

from iterative_ensemble_smoother.utils import steplength_exponential

X_ES_ert = X.copy()
Y_ES_ert = Y.copy()
smoother_es = ies.SIES(
    parameters=X_ES_ert,
    covariance=d.sd.values**2,
    observations=d.value.values,
    seed=42,
)
X_ES_ert = smoother_es.sies_iteration(Y_ES_ert, step_length=1.0)


X_IES_ert = X.copy()
Y_IES_ert = Y.copy()
smoother_ies = ies.SIES(
    parameters=X_IES_ert,
    covariance=d.sd.values**2,
    observations=d.value.values,
    seed=42,
)
n_ies_iter = 7
for i in range(n_ies_iter):
    step_length = steplength_exponential(i + 1)
    X_IES_ert = smoother_ies.sies_iteration(Y_IES_ert, step_length=step_length)

    _coeff_a, _coeff_b, _coeff_c = X_IES_ert

    _fwd_runs = p_map(
        poly,
        _coeff_a,
        _coeff_b,
        _coeff_c,
        [np.arange(max(x_observations) + 1)] * ensemble_size,
        desc=f"SIES ert iteration {i}",
    )

    Y_IES_ert = np.array(
        [fwd_run[d.index.get_level_values("x").to_list()] for fwd_run in _fwd_runs]
    ).T

Plots to compare results#

def plot_posterior(ax, posterior, method):
    for i, param in enumerate("abc"):
        ax[i].set_title(param)
        ax[i].hist(posterior[i, :], label=f"{method} posterior", alpha=0.5, bins="fd")
        ax[i].legend()

    fig.tight_layout()
    return ax


fig, ax = plt.subplots(nrows=3, figsize=(7, 8))

for i in range(3):
    ax[i].hist(X[i, :], label="prior", bins="fd")

ax[0].axvline(a_t, color="k", linestyle="--", label="truth")
ax[1].axvline(b_t, color="k", linestyle="--", label="truth")
ax[2].axvline(c_t, color="k", linestyle="--", label="truth")

plot_posterior(ax, X_ES_ert, method="ES ert")
_ = plot_posterior(ax, X_IES_ert, method=f"SIES ert ({n_ies_iter})")
_images/77f39869e811e17979e00d15c8d0f76f03b86c1fd4da3ff6efb350690dada95a.png
fig, axes = plt.subplots(1, 3, figsize=(8, 2.75))
axes = axes.ravel()
labels = "abc"
true_parameters = [a_t, b_t, c_t]

fig.suptitle(f"SIES ert ({n_ies_iter}) Posterior distribution")
for k, (i, j) in enumerate(itertools.combinations([0, 1, 2], 2)):
    axes[k].scatter(X[i, :], X[j, :], s=15, alpha=0.6)
    axes[k].scatter(X_ES_ert[i, :], X_ES_ert[j, :], s=15, alpha=0.2)
    axes[k].scatter(X_IES_ert[i, :], X_IES_ert[j, :], s=15, alpha=0.2)
    axes[k].scatter(
        [true_parameters[i]],
        [true_parameters[j]],
        color="black",
        s=100,
        label="True value",
    )
    axes[k].set_xlabel(labels[i])
    axes[k].set_ylabel(labels[j])


axes[k].legend()
fig.tight_layout()
plt.show()
_images/84abd5465111501f084c5acacd87eb4f8bc5ad81117057f834b2d2f817497379.png
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(8, 2.5), sharex=True, sharey=True)
x_plot = np.linspace(0, 10, 2**8)

# Plot the prior
ax1.set_title("Prior")
ax1.plot(x_plot, poly(a_t, b_t, c_t, x_plot), zorder=10, lw=4, color="black")
for parameter_prior in X.T:
    ax1.plot(
        x_plot, poly(*parameter_prior, x_plot), color=COLORS[0], alpha=0.1, zorder=5
    )

# Plot the posterior
ax2.set_title("ES ert posterior")
ax2.plot(x_plot, poly(a_t, b_t, c_t, x_plot), zorder=10, lw=4, color="black")
for parameter_posterior in X_ES_ert.T:
    ax2.plot(
        x_plot, poly(*parameter_posterior, x_plot), alpha=0.1, zorder=5, color=COLORS[1]
    )

# Plot the posterior
ax3.set_title(f"SIES ert ({n_ies_iter}) posterior")
ax3.plot(x_plot, poly(a_t, b_t, c_t, x_plot), zorder=10, lw=4, color="black")
for parameter_posterior in X_IES_ert.T:
    ax3.plot(
        x_plot, poly(*parameter_posterior, x_plot), alpha=0.1, zorder=5, color=COLORS[2]
    )

# Common axes setup
for ax in [ax1, ax2, ax3]:
    ax.set_ylim([0, 70])
    ax.set_xlabel("Time step")
    ax.set_ylabel("Response")
    ax.grid(zorder=0)

fig.tight_layout()
plt.show()
_images/b49b4edb29516d02a838fe4fb54d26728bd23fdc2277d3bc46b7d3cf90a4d6b0.png