begin
using MortalityTables
using Turing
using UUIDs
using DataFramesMeta
using MCMCChains, Plots, StatsPlots
using LinearAlgebra
using PlutoUI; TableOfContents()
using Pipe
using StatisticalRethinking
using StatsFuns
end
The problem of interest is to look at mortality rates, which are given in terms of exposures (whether or not a life experienced a death in a given year).
We'll grab some example rates from an insurance table, which has a "selection" component: When someone enters observation, say at age 50, their mortality is path dependent (so for someone who started being observed at 50 will have a different risk/mortality rate at age 55 than someone who started being observed at 45).
Addtionally, there may be additional groups of interest, such as:
high/medium/low risk classification
sex
group (e.g. company, data source, etc.)
type of insurance product offered
The example data will start with only the risk classification above
src = MortalityTables.table("2001 VBT Residual Standard Select and Ultimate - Male Nonsmoker, ANB")
MortalityTable (Insured Lives Mortality): Name: 2001 VBT Residual Standard Select and Ultimate - Male Nonsmoker, ANB Fields: (:select, :ultimate, :metadata) Provider: Society of Actuaries mort.SOA.org ID: 1118 mort.SOA.org link: https://mort.soa.org/ViewTable.aspx?&TableIdentity=1118 Description: 2001 Valuation Basic Table (VBT) Residual Standard Select and Ultimate Table - Male Nonsmoker. Basis: Age Nearest Birthday. Minimum Select Age: 0. Maximum Select Age: 99. Minimum Ultimate Age: 25. Maximum Ultimate Age: 120
src.select[50]
71-element OffsetArray(::Vector{Float64}, 50:120) with eltype Float64 with indices 50:120: 0.00104 0.00139 0.00177 0.00218 0.00261 0.00315 0.00384 ⋮ 0.75603 0.79988 0.84627 0.89536 0.94729 1.0
n = 10_000
10000
function generate_data_individual(tbl,issue_age=rand(50:55),inforce_years=rand(1:30),risklevel=rand(1:3))
# risk_factors will scale the "true" parameter up or down
# we observe the assigned risklevel, but not risk_factor
risk_factors = [0.7,1.0,1.5]
rf = risk_factors[risklevel]
deaths = rand(inforce_years) .< (tbl.select[issue_age][issue_age .+ inforce_years .- 1 ] .* rf)
endpoint = if sum(deaths) == 0
last(inforce_years)
else
findfirst(deaths)
end
id= uuid1()
map(1:endpoint) do i
(
issue_age=issue_age,
risklevel = risklevel,
att_age = issue_age + i -1,
death = deaths[i],
id = id,
)
end
end
generate_data_individual (generic function with 4 methods)
exposures = vcat([generate_data_individual(src) for _ in 1:n]...) |> DataFrame
issue_age | risklevel | att_age | death | id | |
---|---|---|---|---|---|
1 | 53 | 3 | 53 | false | UUID("972cab6c-93c9-11ed-0ddb-db5fd1b2311d") |
2 | 53 | 3 | 54 | false | UUID("972cab6c-93c9-11ed-0ddb-db5fd1b2311d") |
3 | 53 | 3 | 55 | false | UUID("972cab6c-93c9-11ed-0ddb-db5fd1b2311d") |
4 | 55 | 2 | 55 | false | UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b") |
5 | 55 | 2 | 56 | false | UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b") |
6 | 55 | 2 | 57 | false | UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b") |
7 | 55 | 2 | 58 | false | UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b") |
8 | 55 | 2 | 59 | false | UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b") |
9 | 55 | 2 | 60 | false | UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b") |
10 | 55 | 2 | 61 | false | UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b") |
... | |||||
108849 | 52 | 3 | 53 | true | UUID("975c8ed6-93c9-11ed-1589-0dc00462a941") |
data = combine(groupby(exposures,[:issue_age,:att_age])) do subdf
(exposures = nrow(subdf),
deaths = sum(subdf.death),
fraction = sum(subdf.death)/ nrow(subdf))
end
issue_age | att_age | exposures | deaths | fraction | |
---|---|---|---|---|---|
1 | 50 | 50 | 1691 | 37 | 0.0218805 |
2 | 50 | 51 | 1592 | 22 | 0.0138191 |
3 | 50 | 52 | 1524 | 31 | 0.0203412 |
4 | 50 | 53 | 1419 | 28 | 0.0197322 |
5 | 50 | 54 | 1329 | 31 | 0.0233258 |
6 | 50 | 55 | 1244 | 18 | 0.0144695 |
7 | 50 | 56 | 1174 | 14 | 0.011925 |
8 | 50 | 57 | 1104 | 19 | 0.0172101 |
9 | 50 | 58 | 1032 | 25 | 0.0242248 |
10 | 50 | 59 | 967 | 31 | 0.0320579 |
... | |||||
180 | 55 | 84 | 2 | 0 | 0.0 |
data2 = combine(groupby(exposures,[:issue_age,:att_age,:risklevel])) do subdf
(exposures = nrow(subdf),
deaths = sum(subdf.death),
fraction = sum(subdf.death)/ nrow(subdf))
end
issue_age | att_age | risklevel | exposures | deaths | fraction | |
---|---|---|---|---|---|---|
1 | 50 | 50 | 1 | 575 | 5 | 0.00869565 |
2 | 50 | 50 | 2 | 560 | 11 | 0.0196429 |
3 | 50 | 50 | 3 | 556 | 21 | 0.0377698 |
4 | 50 | 51 | 1 | 548 | 7 | 0.0127737 |
5 | 50 | 51 | 2 | 533 | 7 | 0.0131332 |
6 | 50 | 51 | 3 | 511 | 8 | 0.0156556 |
7 | 50 | 52 | 1 | 522 | 7 | 0.01341 |
8 | 50 | 52 | 2 | 516 | 9 | 0.0174419 |
9 | 50 | 52 | 3 | 486 | 15 | 0.0308642 |
10 | 50 | 53 | 1 | 485 | 2 | 0.00412371 |
... | ||||||
531 | 55 | 84 | 1 | 2 | 0 | 0.0 |
Estiamte $p$, the average mortality rate, not accounting for any variation within the population/sample:
@model function mortality(data,deaths)
p ~ Beta(1,1)
for i = 1:nrow(data)
deaths[i] ~ Binomial(data.exposures[i],p)
end
end
mortality (generic function with 2 methods)
m1 = mortality(data,data.deaths)
DynamicPPL.Model{typeof(mortality), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality, (data = 180×5 DataFrame Row │ issue_age att_age exposures deaths fraction │ Int64 Int64 Int64 Int64 Float64 ─────┼────────────────────────────────────────────────── 1 │ 50 50 1691 37 0.0218805 2 │ 50 51 1592 22 0.0138191 3 │ 50 52 1524 31 0.0203412 4 │ 50 53 1419 28 0.0197322 5 │ 50 54 1329 31 0.0233258 ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ 177 │ 55 81 25 1 0.04 178 │ 55 82 16 2 0.125 179 │ 55 83 8 0 0.0 180 │ 55 84 2 0 0.0 171 rows omitted, deaths = [37, 22, 31, 28, 31, 18, 14, 19, 25, 31 … 7, 3, 6, 6, 5, 1, 1, 2, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())
num_chains = 4
4
We use a No-U-Turn-Sampler (NUTS) technique to sample multile chains at once:
chain = sample(m1, NUTS(), 1000)
iteration | chain | p | lp | n_steps | is_accept | acceptance_rate | log_density | ... | |
---|---|---|---|---|---|---|---|---|---|
1 | 501 | 1 | 0.0296173 | -565.871 | 1.0 | 1.0 | 1.0 | -565.871 | |
2 | 502 | 1 | 0.028972 | -566.265 | 3.0 | 1.0 | 0.603777 | -566.265 | |
3 | 503 | 1 | 0.028972 | -566.265 | 1.0 | 1.0 | 0.733405 | -566.265 | |
4 | 504 | 1 | 0.0295351 | -565.833 | 3.0 | 1.0 | 0.595807 | -565.833 | |
5 | 505 | 1 | 0.0291248 | -566.027 | 3.0 | 1.0 | 0.724427 | -566.027 | |
6 | 506 | 1 | 0.0294442 | -565.82 | 3.0 | 1.0 | 0.644897 | -565.82 | |
7 | 507 | 1 | 0.0293161 | -565.856 | 3.0 | 1.0 | 0.989477 | -565.856 | |
8 | 508 | 1 | 0.0293161 | -565.856 | 1.0 | 1.0 | 0.94016 | -565.856 | |
9 | 509 | 1 | 0.029315 | -565.856 | 3.0 | 1.0 | 0.699563 | -565.856 | |
10 | 510 | 1 | 0.0303726 | -567.398 | 3.0 | 1.0 | 0.669545 | -567.398 | |
... |
plot(chain)
We can see that the sampling of possible posterior parameters doesn't really fit the data very well since our model was so simplified. The lines represent the posterior binomial probability.
This is saying that for the observed data, if there really is just a single probability p
that governs the true process that came up with the data, there's a pretty narrow range of values it could possibly be:
let
data_weight = data.exposures ./ sum(data.exposures)
data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
p = scatter(
data.att_age,
data.fraction,
markersize = data_weight,
alpha = 0.5,
label = "Experience data point (size indicates relative exposure quantity)",
xlabel="age",
ylim=(0.0,0.25),
ylabel="mortality rate",
title="Parametric Bayseian Mortality"
)
# show n samples from the posterior plotted on the graph
n = 300
ages = sort!(unique(data.att_age))
for i in 1:n
p_posterior = sample(chain,1)[:p][1]
hline!([p_posterior],label="",alpha=0.1)
end
p
end
The posterior mean of p
is of course very close to the simple proportoin of claims to exposures:
mean(chain,:p)
0.029440645004487405
sum(data.deaths) / sum(data.exposures)
0.029444459756175986
In this example, we utilize a MakehamBeard parameterization because it's already very similar in form to a logistic function. This is important because our desired output is a probability (ie the probablity of a death at a given age), so the value must be constrained to be in the interval between zero and one.
The prior values for a
,b
,c
, and k
are chosen to constrain the hazard (mortality) rate to be between zero and one.
This isn't an ideal parameterization (e.g. we aren't including information about the select underwriting period), but is an example of utilizing Bayesian techniques on life experience data.
@model function mortality2(data,deaths)
a ~ Exponential(0.1)
b ~ Exponential(0.1)
c = 0.
k ~ truncated(Exponential(1),1,Inf)
# use the variables to create a parametric mortality model
m = MortalityTables.MakehamBeard(;a,b,c,k)
# loop through the rows of the dataframe to let Turing observe the data
# and how consistent the parameters are with the data
for i = 1:nrow(data)
age = data.att_age[i]
q = MortalityTables.hazard(m,age)
deaths[i] ~ Binomial(data.exposures[i],q)
end
end
mortality2 (generic function with 2 methods)
We combine the model with the data:
m2 = mortality2(data,data.deaths)
DynamicPPL.Model{typeof(mortality2), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality2, (data = 180×5 DataFrame Row │ issue_age att_age exposures deaths fraction │ Int64 Int64 Int64 Int64 Float64 ─────┼────────────────────────────────────────────────── 1 │ 50 50 1691 37 0.0218805 2 │ 50 51 1592 22 0.0138191 3 │ 50 52 1524 31 0.0203412 4 │ 50 53 1419 28 0.0197322 5 │ 50 54 1329 31 0.0233258 ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ 177 │ 55 81 25 1 0.04 178 │ 55 82 16 2 0.125 179 │ 55 83 8 0 0.0 180 │ 55 84 2 0 0.0 171 rows omitted, deaths = [37, 22, 31, 28, 31, 18, 14, 19, 25, 31 … 7, 3, 6, 6, 5, 1, 1, 2, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())
We use a No-U-Turn-Sampler (NUTS) technique to sample:
chain2 = sample(m2, NUTS(), 1000)
iteration | chain | a | b | k | lp | n_steps | is_accept | ... | |
---|---|---|---|---|---|---|---|---|---|
1 | 501 | 1 | 0.00538775 | 0.0285763 | 1.22514 | -508.349 | 47.0 | 1.0 | |
2 | 502 | 1 | 0.00605425 | 0.0276171 | 2.11015 | -508.484 | 31.0 | 1.0 | |
3 | 503 | 1 | 0.00424921 | 0.0327401 | 1.85763 | -507.655 | 31.0 | 1.0 | |
4 | 504 | 1 | 0.00521705 | 0.0294948 | 1.83623 | -507.443 | 31.0 | 1.0 | |
5 | 505 | 1 | 0.00429435 | 0.0322212 | 1.11689 | -508.758 | 63.0 | 1.0 | |
6 | 506 | 1 | 0.00543553 | 0.0289158 | 1.14623 | -508.747 | 63.0 | 1.0 | |
7 | 507 | 1 | 0.00550289 | 0.0293801 | 3.30914 | -508.596 | 63.0 | 1.0 | |
8 | 508 | 1 | 0.00692691 | 0.0254078 | 2.18266 | -509.896 | 63.0 | 1.0 | |
9 | 509 | 1 | 0.00393535 | 0.0336556 | 1.26862 | -508.705 | 31.0 | 1.0 | |
10 | 510 | 1 | 0.00415108 | 0.0339217 | 1.71502 | -509.248 | 31.0 | 1.0 | |
... |
summarize(chain2)
parameters | mean | std | naive_se | mcse | ess | rhat | ess_per_sec | |
---|---|---|---|---|---|---|---|---|
1 | :a | 0.00485458 | 0.000909626 | 2.87649e-5 | 4.57773e-5 | 319.526 | 1.00142 | 2.69408 |
2 | :b | 0.0311342 | 0.00304173 | 9.61878e-5 | 0.000141053 | 347.652 | 1.00007 | 2.93123 |
3 | :k | 1.85048 | 0.841972 | 0.0266255 | 0.0357123 | 467.084 | 0.999086 | 3.93821 |
plot(chain2)
We can see that the sampling of possible posterior parameters fits the data well:
let
data_weight = data.exposures ./ sum(data.exposures)
data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
p = scatter(
data.att_age,
data.fraction,
markersize = data_weight,
alpha = 0.5,
label = "Experience data point (size indicates relative exposure quantity)",
xlabel="age",
ylim=(0.0,0.25),
ylabel="mortality rate",
title="Parametric Bayseian Mortality"
)
# show n samples from the posterior plotted on the graph
n = 300
ages = sort!(unique(data.att_age))
for i in 1:n
s = sample(chain2,1)
a = only(s[:a])
b = only(s[:b])
k = only(s[:k])
c = 0
m = MortalityTables.MakehamBeard(;a,b,c,k)
plot!(ages,age -> MortalityTables.hazard(m,age), alpha = 0.1,label="")
end
p
end
This model extends the prior to create a multi-level model. Each risk class (risklevel
) gets its own $a$ paramater in the MakhamBeard
model. The prior for $a_i$ is determined by the hyperparameter $\bar{a}$.
@model function mortality3(data,deaths)
risk_levels = length(levels(data.risklevel))
b ~ Exponential(0.1)
ā ~ Exponential(0.1)
a ~ filldist(Exponential(ā), risk_levels)
c = 0
k ~ truncated(Exponential(1),1,Inf)
# use the variables to create a parametric mortality model
# loop through the rows of the dataframe to let Turing observe the data
# and how consistent the parameters are with the data
for i = 1:nrow(data)
risk = data.risklevel[i]
m = MortalityTables.MakehamBeard(;a=a[risk],b,c,k)
age = data.att_age[i]
q = MortalityTables.hazard(m,age)
deaths[i] ~ Binomial(data.exposures[i],q)
end
end
mortality3 (generic function with 2 methods)
m3 = mortality3(data2,data2.deaths)
DynamicPPL.Model{typeof(mortality3), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality3, (data = 531×6 DataFrame Row │ issue_age att_age risklevel exposures deaths fraction │ Int64 Int64 Int64 Int64 Int64 Float64 ─────┼────────────────────────────────────────────────────────────── 1 │ 50 50 1 575 5 0.00869565 2 │ 50 50 2 560 11 0.0196429 3 │ 50 50 3 556 21 0.0377698 4 │ 50 51 1 548 7 0.0127737 5 │ 50 51 2 533 7 0.0131332 ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ 528 │ 55 83 1 5 0 0.0 529 │ 55 83 2 2 0 0.0 530 │ 55 83 3 1 0 0.0 531 │ 55 84 1 2 0 0.0 522 rows omitted, deaths = [5, 11, 21, 7, 7, 8, 7, 9, 15, 2 … 0, 1, 0, 1, 1, 0, 0, 0, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())
chain3 = sample(m3, NUTS(), 1000)
iteration | chain | b | ā | a[1] | a[2] | a[3] | k | ... | |
---|---|---|---|---|---|---|---|---|---|
1 | 501 | 1 | 0.0397605 | 0.00747611 | 0.00204091 | 0.00282777 | 0.00456048 | 3.14971 | |
2 | 502 | 1 | 0.0393775 | 0.00718988 | 0.0019361 | 0.00299858 | 0.00459479 | 2.67492 | |
3 | 503 | 1 | 0.0382458 | 0.00455261 | 0.00205516 | 0.0030325 | 0.00465574 | 2.23378 | |
4 | 504 | 1 | 0.0368445 | 0.00352418 | 0.00225198 | 0.00326397 | 0.00502476 | 1.52521 | |
5 | 505 | 1 | 0.0341907 | 0.0147432 | 0.00260135 | 0.00393644 | 0.00590104 | 1.35102 | |
6 | 506 | 1 | 0.0329568 | 0.017055 | 0.00279913 | 0.00416678 | 0.00639691 | 1.32034 | |
7 | 507 | 1 | 0.0379495 | 0.00582986 | 0.00218864 | 0.00338655 | 0.00514394 | 3.43358 | |
8 | 508 | 1 | 0.0337056 | 0.0123958 | 0.00265158 | 0.00399195 | 0.00588518 | 1.41406 | |
9 | 509 | 1 | 0.0371748 | 0.00343053 | 0.00211543 | 0.00312111 | 0.00484137 | 1.00745 | |
10 | 510 | 1 | 0.0345227 | 0.00464117 | 0.00253952 | 0.00372857 | 0.00578524 | 2.17735 | |
... |
summarize(chain3)
parameters | mean | std | naive_se | mcse | ess | rhat | ess_per_sec | |
---|---|---|---|---|---|---|---|---|
1 | :b | 0.0376547 | 0.00372902 | 0.000117922 | 0.00019069 | 246.104 | 1.00899 | 0.749215 |
2 | :ā | 0.0087331 | 0.00974532 | 0.000308174 | 0.000501709 | 426.733 | 0.999014 | 1.29911 |
3 | Symbol("a[1]") | 0.00223165 | 0.000476801 | 1.50778e-5 | 2.48849e-5 | 260.92 | 1.01057 | 0.794322 |
4 | Symbol("a[2]") | 0.0032482 | 0.000670935 | 2.12168e-5 | 3.58145e-5 | 259.539 | 1.0101 | 0.790116 |
5 | Symbol("a[3]") | 0.00505183 | 0.00098926 | 3.12832e-5 | 5.27162e-5 | 263.255 | 1.01308 | 0.801429 |
6 | :k | 2.39737 | 1.32323 | 0.0418442 | 0.0563751 | 380.416 | 0.999039 | 1.1581 |
PRECIS(DataFrame(chain3))
┌───────┬───────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├───────┼───────────────────────────────────────────────────────┤ │ a[1] │ 0.0022 0.0005 0.0015 0.0022 0.003 ▁▆█▄▂▁▁ │ │ a[2] │ 0.0032 0.0007 0.0023 0.0032 0.0044 ▁▃██▆▃▁▁▁ │ │ a[3] │ 0.0051 0.001 0.0036 0.005 0.0068 ▁▂▄▇▇█▆▃▂▁▁▁▁ │ │ b │ 0.0377 0.0037 0.0321 0.0373 0.0436 ▁▁▂▄▇█▇▅▃▁▁▁ │ │ k │ 2.3974 1.3232 1.0745 1.9662 5.0622 █▄▂▂▁▁▁▁ │ │ ā │ 0.0087 0.0097 0.0022 0.006 0.0219 █▁▁▁▁▁▁▁ │ └───────┴───────────────────────────────────────────────────────┘
let data = data2
data_weight = data.exposures ./ sum(data.exposures)
data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
color_i = data.risklevel
p = scatter(
data.att_age,
data.fraction,
markersize = data_weight,
alpha = 0.5,
color=color_i,
label = "Experience data point (size indicates relative exposure quantity)",
xlabel="age",
ylim=(0.0,0.25),
ylabel="mortality rate",
title="Parametric Bayseian Mortality"
)
# show n samples from the posterior plotted on the graph
n = 100
ages = sort!(unique(data.att_age))
for r in 1:3
for i in 1:n
s = sample(chain3,1)
a = only(s[Symbol("a[$r]")])
b = only(s[:b])
k = only(s[:k])
c = 0
m = MortalityTables.MakehamBeard(;a,b,c,k)
if i == 1
plot!(ages,age -> MortalityTables.hazard(m,age),label="risk level $r", alpha = 0.2,color=r)
else
plot!(ages,age -> MortalityTables.hazard(m,age),label="", alpha = 0.2,color=r)
end
end
end
p
end
The key is to use the Poisson distribution:
@model function mortality4(data,deaths)
risk_levels = length(levels(data.risklevel))
b ~ Exponential(0.1)
ā ~ Exponential(0.1)
a ~ filldist(Exponential(ā), risk_levels)
c ~ Beta(4,18)
k ~ truncated(Exponential(1),1,Inf)
# use the variables to create a parametric mortality model
# loop through the rows of the dataframe to let Turing observe the data
# and how consistent the parameters are with the data
for i = 1:nrow(data)
risk = data.risklevel[i]
m = MortalityTables.MakehamBeard(;a=a[risk],b,c,k)
age = data.att_age[i]
q = MortalityTables.hazard(m,age)
deaths[i] ~ Poisson(data.exposures[i] * q)
end
end
mortality4 (generic function with 2 methods)
m4 = mortality4(data2,data2.deaths)
DynamicPPL.Model{typeof(mortality4), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality4, (data = 531×6 DataFrame Row │ issue_age att_age risklevel exposures deaths fraction │ Int64 Int64 Int64 Int64 Int64 Float64 ─────┼────────────────────────────────────────────────────────────── 1 │ 50 50 1 575 5 0.00869565 2 │ 50 50 2 560 11 0.0196429 3 │ 50 50 3 556 21 0.0377698 4 │ 50 51 1 548 7 0.0127737 5 │ 50 51 2 533 7 0.0131332 ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ 528 │ 55 83 1 5 0 0.0 529 │ 55 83 2 2 0 0.0 530 │ 55 83 3 1 0 0.0 531 │ 55 84 1 2 0 0.0 522 rows omitted, deaths = [5, 11, 21, 7, 7, 8, 7, 9, 15, 2 … 0, 1, 0, 1, 1, 0, 0, 0, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())
chain4 = sample(m4, NUTS(), 1000)
iteration | chain | b | ā | a[1] | a[2] | a[3] | c | ... | |
---|---|---|---|---|---|---|---|---|---|
1 | 501 | 1 | 0.0439438 | 0.00307492 | 0.000870426 | 0.00149385 | 0.00255717 | 0.00638515 | |
2 | 502 | 1 | 0.0438041 | 0.00591787 | 0.00108074 | 0.00160408 | 0.00290747 | 0.00694355 | |
3 | 503 | 1 | 0.0417991 | 0.00359182 | 0.00126037 | 0.0021495 | 0.00330973 | 0.00474184 | |
4 | 504 | 1 | 0.0416494 | 0.00268639 | 0.00130428 | 0.00197703 | 0.00325999 | 0.00528317 | |
5 | 505 | 1 | 0.0508616 | 0.00126101 | 0.000628299 | 0.000956821 | 0.00164957 | 0.00809153 | |
6 | 506 | 1 | 0.0524745 | 0.00151628 | 0.000407118 | 0.000850948 | 0.0014247 | 0.0098292 | |
7 | 507 | 1 | 0.0440666 | 0.00198958 | 0.00104292 | 0.00186851 | 0.00317848 | 0.00531812 | |
8 | 508 | 1 | 0.0493017 | 0.00451559 | 0.000665149 | 0.00118859 | 0.00186156 | 0.00715901 | |
9 | 509 | 1 | 0.0483145 | 0.00784937 | 0.000785016 | 0.00139438 | 0.00221619 | 0.00743335 | |
10 | 510 | 1 | 0.0488616 | 0.00382112 | 0.000843573 | 0.00124841 | 0.0024025 | 0.0048838 | |
... |
PRECIS(DataFrame(chain4))
┌───────┬────────────────────────────────────────────────────────┐ │ param │ mean std 5.5% 50% 94.5% histogram │ ├───────┼────────────────────────────────────────────────────────┤ │ a[1] │ 0.001 0.0004 0.0004 0.001 0.0017 ▁▂▅█▇▇▆▄▂▁▁▁▁▁ │ │ a[2] │ 0.0016 0.0006 0.0007 0.0015 0.0026 ▁▅██▅▂▁▁▁ │ │ a[3] │ 0.0027 0.0009 0.0013 0.0026 0.0042 ▁▄▇██▇▅▂▁▁▁▁▁ │ │ b │ 0.0461 0.0058 0.0383 0.0453 0.056 ▁▃█▇▄▂▁▁▁ │ │ c │ 0.0061 0.0021 0.0026 0.0059 0.0096 ▁▃▄▆█▇▆▄▃▁▁▁▁ │ │ k │ 2.5842 1.4751 1.0752 2.1688 5.2745 █▅▃▂▁▁▁▁▁▁▁▁ │ │ ā │ 0.0052 0.0071 0.0008 0.0029 0.017 █▁▁▁▁▁▁▁ │ └───────┴────────────────────────────────────────────────────────┘
risk_factors4 = [mean(chain4[Symbol("a[$f]")]) for f in 1:3]
3-element Vector{Float64}: 0.0009925896942950376 0.001597560230398143 0.0026938300706679545
risk_factors4 ./ risk_factors4[2]
3-element Vector{Float64}: 0.6213159763295215 1.0 1.6862150292741074
let data = data2
data_weight = data.exposures ./ sum(data.exposures)
data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
color_i = data.risklevel
p = scatter(
data.att_age,
data.fraction,
markersize = data_weight,
alpha = 0.5,
color=color_i,
label = "Experience data point (size indicates relative exposure quantity)",
xlabel="age",
ylim=(0.0,0.25),
ylabel="mortality rate",
title="Parametric Bayseian Mortality"
)
# show n samples from the posterior plotted on the graph
n = 100
ages = sort!(unique(data.att_age))
for r in 1:3
for i in 1:n
s = sample(chain4,1)
a = only(s[Symbol("a[$r]")])
b = only(s[:b])
k = only(s[:k])
c = 0
m = MortalityTables.MakehamBeard(;a,b,c,k)
if i == 1
plot!(ages,age -> MortalityTables.hazard(m,age),label="risk level $r", alpha = 0.2,color=r)
else
plot!(ages,age -> MortalityTables.hazard(m,age),label="", alpha = 0.2,color=r)
end
end
end
p
end