Run this Pluto Notebook

begin
    using MortalityTables
    using Turing
    using UUIDs
    using DataFramesMeta
    using MCMCChains, Plots, StatsPlots
    using LinearAlgebra
    using PlutoUI; TableOfContents()
    using Pipe
    using StatisticalRethinking
    using StatsFuns
end

Generating fake data

The problem of interest is to look at mortality rates, which are given in terms of exposures (whether or not a life experienced a death in a given year).

We'll grab some example rates from an insurance table, which has a "selection" component: When someone enters observation, say at age 50, their mortality is path dependent (so for someone who started being observed at 50 will have a different risk/mortality rate at age 55 than someone who started being observed at 45).

Addtionally, there may be additional groups of interest, such as:

high/medium/low risk classification
sex
group (e.g. company, data source, etc.)
type of insurance product offered

The example data will start with only the risk classification above

src = MortalityTables.table("2001 VBT Residual Standard Select and Ultimate - Male Nonsmoker, ANB")

MortalityTable (Insured Lives Mortality):
   Name:
       2001 VBT Residual Standard Select and Ultimate - Male Nonsmoker, ANB
   Fields: 
       (:select, :ultimate, :metadata)
   Provider:
       Society of Actuaries
   mort.SOA.org ID:
       1118
   mort.SOA.org link:
       https://mort.soa.org/ViewTable.aspx?&TableIdentity=1118
   Description:
       2001 Valuation Basic Table (VBT) Residual Standard Select and Ultimate Table -  Male Nonsmoker. Basis: Age Nearest Birthday. Minimum Select Age: 0. Maximum Select Age: 99. Minimum Ultimate Age: 25. Maximum Ultimate Age: 120

src.select[50]

71-element OffsetArray(::Vector{Float64}, 50:120) with eltype Float64 with indices 50:120:
 0.00104
 0.00139
 0.00177
 0.00218
 0.00261
 0.00315
 0.00384
 ⋮
 0.75603
 0.79988
 0.84627
 0.89536
 0.94729
 1.0

n = 10_000

function generate_data_individual(tbl,issue_age=rand(50:55),inforce_years=rand(1:30),risklevel=rand(1:3))
    # risk_factors will scale the "true" parameter up or down
    # we observe the assigned risklevel, but not risk_factor
    risk_factors = [0.7,1.0,1.5]
    rf = risk_factors[risklevel]
    deaths = rand(inforce_years) .< (tbl.select[issue_age][issue_age .+ inforce_years .- 1 ] .* rf)
    
    endpoint = if sum(deaths) == 0
        last(inforce_years)
    else
        findfirst(deaths)
    end
    id= uuid1()
    map(1:endpoint) do i
        (
        issue_age=issue_age,
        risklevel = risklevel,
        att_age = issue_age + i -1,
        death = deaths[i],
        id = id,
    )
    end
    
end

generate_data_individual (generic function with 4 methods)

exposures = vcat([generate_data_individual(src) for _ in 1:n]...) |> DataFrame

	issue_age	risklevel	att_age	death	id
1	53	3	53	false	UUID("972cab6c-93c9-11ed-0ddb-db5fd1b2311d")
2	53	3	54	false	UUID("972cab6c-93c9-11ed-0ddb-db5fd1b2311d")
3	53	3	55	false	UUID("972cab6c-93c9-11ed-0ddb-db5fd1b2311d")
4	55	2	55	false	UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b")
5	55	2	56	false	UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b")
6	55	2	57	false	UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b")
7	55	2	58	false	UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b")
8	55	2	59	false	UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b")
9	55	2	60	false	UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b")
10	55	2	61	false	UUID("9750238a-93c9-11ed-1fd3-eb3978daaa6b")
...
108849	52	3	53	true	UUID("975c8ed6-93c9-11ed-1589-0dc00462a941")

data = combine(groupby(exposures,[:issue_age,:att_age])) do subdf
    (exposures = nrow(subdf),
    deaths = sum(subdf.death),
    fraction = sum(subdf.death)/ nrow(subdf))
end

	issue_age	att_age	exposures	deaths	fraction
1	50	50	1691	37	0.0218805
2	50	51	1592	22	0.0138191
3	50	52	1524	31	0.0203412
4	50	53	1419	28	0.0197322
5	50	54	1329	31	0.0233258
6	50	55	1244	18	0.0144695
7	50	56	1174	14	0.011925
8	50	57	1104	19	0.0172101
9	50	58	1032	25	0.0242248
10	50	59	967	31	0.0320579
...
180	55	84	2	0	0.0

data2 = combine(groupby(exposures,[:issue_age,:att_age,:risklevel])) do subdf
    (exposures = nrow(subdf),
    deaths = sum(subdf.death),
    fraction = sum(subdf.death)/ nrow(subdf))
end

	issue_age	att_age	risklevel	exposures	deaths	fraction
1	50	50	1	575	5	0.00869565
2	50	50	2	560	11	0.0196429
3	50	50	3	556	21	0.0377698
4	50	51	1	548	7	0.0127737
5	50	51	2	533	7	0.0131332
6	50	51	3	511	8	0.0156556
7	50	52	1	522	7	0.01341
8	50	52	2	516	9	0.0174419
9	50	52	3	486	15	0.0308642
10	50	53	1	485	2	0.00412371
...
531	55	84	1	2	0	0.0

1: A single binomial parameter model

Estiamte $p$, the average mortality rate, not accounting for any variation within the population/sample:

@model function mortality(data,deaths) 
    p ~ Beta(1,1)
    for i = 1:nrow(data)
        deaths[i] ~ Binomial(data.exposures[i],p)
    end
end

mortality (generic function with 2 methods)

m1 = mortality(data,data.deaths)

DynamicPPL.Model{typeof(mortality), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality, (data = 180×5 DataFrame
 Row │ issue_age  att_age  exposures  deaths  fraction
     │ Int64      Int64    Int64      Int64   Float64
─────┼──────────────────────────────────────────────────
   1 │        50       50       1691      37  0.0218805
   2 │        50       51       1592      22  0.0138191
   3 │        50       52       1524      31  0.0203412
   4 │        50       53       1419      28  0.0197322
   5 │        50       54       1329      31  0.0233258
  ⋮  │     ⋮         ⋮         ⋮        ⋮         ⋮
 177 │        55       81         25       1  0.04
 178 │        55       82         16       2  0.125
 179 │        55       83          8       0  0.0
 180 │        55       84          2       0  0.0
                                        171 rows omitted, deaths = [37, 22, 31, 28, 31, 18, 14, 19, 25, 31  …  7, 3, 6, 6, 5, 1, 1, 2, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())

num_chains = 4

Sampling from the posterior

We use a No-U-Turn-Sampler (NUTS) technique to sample multile chains at once:

chain = sample(m1, NUTS(), 1000)

	iteration	chain	p	lp	n_steps	is_accept	acceptance_rate	log_density
1	501	1	0.0296173	-565.871	1.0	1.0	1.0	-565.871
2	502	1	0.028972	-566.265	3.0	1.0	0.603777	-566.265
3	503	1	0.028972	-566.265	1.0	1.0	0.733405	-566.265
4	504	1	0.0295351	-565.833	3.0	1.0	0.595807	-565.833
5	505	1	0.0291248	-566.027	3.0	1.0	0.724427	-566.027
6	506	1	0.0294442	-565.82	3.0	1.0	0.644897	-565.82
7	507	1	0.0293161	-565.856	3.0	1.0	0.989477	-565.856
8	508	1	0.0293161	-565.856	1.0	1.0	0.94016	-565.856
9	509	1	0.029315	-565.856	3.0	1.0	0.699563	-565.856
10	510	1	0.0303726	-567.398	3.0	1.0	0.669545	-567.398
...

plot(chain)

Plotting samples from the posterior

We can see that the sampling of possible posterior parameters doesn't really fit the data very well since our model was so simplified. The lines represent the posterior binomial probability.

This is saying that for the observed data, if there really is just a single probability p that governs the true process that came up with the data, there's a pretty narrow range of values it could possibly be:

let
    data_weight = data.exposures ./ sum(data.exposures)
    data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
    
    p = scatter(
        data.att_age,
        data.fraction, 
        markersize = data_weight, 
        alpha = 0.5, 
        label = "Experience data point (size indicates relative exposure quantity)",
        xlabel="age",
        ylim=(0.0,0.25),
        ylabel="mortality rate", 
        title="Parametric Bayseian Mortality"
    )
    
    # show n samples from the posterior plotted on the graph
    n = 300
    ages = sort!(unique(data.att_age))
    
    for i in 1:n
        p_posterior = sample(chain,1)[:p][1]
        hline!([p_posterior],label="",alpha=0.1)
    end
    p
    
end

The posterior mean of p is of course very close to the simple proportoin of claims to exposures:

mean(chain,:p)

0.029440645004487405

sum(data.deaths) / sum(data.exposures)

0.029444459756175986

2. Parametric model

In this example, we utilize a MakehamBeard parameterization because it's already very similar in form to a logistic function. This is important because our desired output is a probability (ie the probablity of a death at a given age), so the value must be constrained to be in the interval between zero and one.

The prior values for a,b,c, and k are chosen to constrain the hazard (mortality) rate to be between zero and one.

This isn't an ideal parameterization (e.g. we aren't including information about the select underwriting period), but is an example of utilizing Bayesian techniques on life experience data.

@model function mortality2(data,deaths) 
    a ~ Exponential(0.1)
    b ~ Exponential(0.1)
    c = 0.
    k ~ truncated(Exponential(1),1,Inf)
    
    # use the variables to create a parametric mortality model
    m = MortalityTables.MakehamBeard(;a,b,c,k)

    # loop through the rows of the dataframe to let Turing observe the data 
    # and how consistent the parameters are with the data
    for i = 1:nrow(data)
        age = data.att_age[i]	
        q = MortalityTables.hazard(m,age)
        deaths[i] ~ Binomial(data.exposures[i],q)
    end
end

mortality2 (generic function with 2 methods)

We combine the model with the data:

m2 = mortality2(data,data.deaths)

DynamicPPL.Model{typeof(mortality2), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality2, (data = 180×5 DataFrame
 Row │ issue_age  att_age  exposures  deaths  fraction
     │ Int64      Int64    Int64      Int64   Float64
─────┼──────────────────────────────────────────────────
   1 │        50       50       1691      37  0.0218805
   2 │        50       51       1592      22  0.0138191
   3 │        50       52       1524      31  0.0203412
   4 │        50       53       1419      28  0.0197322
   5 │        50       54       1329      31  0.0233258
  ⋮  │     ⋮         ⋮         ⋮        ⋮         ⋮
 177 │        55       81         25       1  0.04
 178 │        55       82         16       2  0.125
 179 │        55       83          8       0  0.0
 180 │        55       84          2       0  0.0
                                        171 rows omitted, deaths = [37, 22, 31, 28, 31, 18, 14, 19, 25, 31  …  7, 3, 6, 6, 5, 1, 1, 2, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())

Sampling from the posterior

We use a No-U-Turn-Sampler (NUTS) technique to sample:

chain2 = sample(m2, NUTS(), 1000)

	iteration	chain	a	b	k	lp	n_steps	is_accept
1	501	1	0.00538775	0.0285763	1.22514	-508.349	47.0	1.0
2	502	1	0.00605425	0.0276171	2.11015	-508.484	31.0	1.0
3	503	1	0.00424921	0.0327401	1.85763	-507.655	31.0	1.0
4	504	1	0.00521705	0.0294948	1.83623	-507.443	31.0	1.0
5	505	1	0.00429435	0.0322212	1.11689	-508.758	63.0	1.0
6	506	1	0.00543553	0.0289158	1.14623	-508.747	63.0	1.0
7	507	1	0.00550289	0.0293801	3.30914	-508.596	63.0	1.0
8	508	1	0.00692691	0.0254078	2.18266	-509.896	63.0	1.0
9	509	1	0.00393535	0.0336556	1.26862	-508.705	31.0	1.0
10	510	1	0.00415108	0.0339217	1.71502	-509.248	31.0	1.0
...

summarize(chain2)

	parameters	mean	std	naive_se	mcse	ess	rhat	ess_per_sec
1	:a	0.00485458	0.000909626	2.87649e-5	4.57773e-5	319.526	1.00142	2.69408
2	:b	0.0311342	0.00304173	9.61878e-5	0.000141053	347.652	1.00007	2.93123
3	:k	1.85048	0.841972	0.0266255	0.0357123	467.084	0.999086	3.93821

plot(chain2)

Plotting samples from the posterior

We can see that the sampling of possible posterior parameters fits the data well:

let
    data_weight = data.exposures ./ sum(data.exposures)
    data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
    
    p = scatter(
        data.att_age,
        data.fraction, 
        markersize = data_weight, 
        alpha = 0.5, 
        label = "Experience data point (size indicates relative exposure quantity)",
        xlabel="age",
        ylim=(0.0,0.25),
        ylabel="mortality rate", 
        title="Parametric Bayseian Mortality"
    )
    

    # show n samples from the posterior plotted on the graph
    n = 300
    ages = sort!(unique(data.att_age))
    
    for i in 1:n
        s = sample(chain2,1)
        a = only(s[:a])
        b = only(s[:b])
        k = only(s[:k])
        c = 0
        m = MortalityTables.MakehamBeard(;a,b,c,k)
        plot!(ages,age -> MortalityTables.hazard(m,age), alpha = 0.1,label="")
    end
    p
end

3. Parametric model

This model extends the prior to create a multi-level model. Each risk class (risklevel) gets its own $a$ paramater in the MakhamBeard model. The prior for $a_i$ is determined by the hyperparameter $\bar{a}$.

@model function mortality3(data,deaths) 
    risk_levels = length(levels(data.risklevel))
    b ~ Exponential(0.1)
    ā ~ Exponential(0.1)
    a ~ filldist(Exponential(ā), risk_levels)
    c = 0
    k ~ truncated(Exponential(1),1,Inf)
    
    # use the variables to create a parametric mortality model

    # loop through the rows of the dataframe to let Turing observe the data 
    # and how consistent the parameters are with the data
    for i = 1:nrow(data)
        risk = data.risklevel[i]
        
        m = MortalityTables.MakehamBeard(;a=a[risk],b,c,k)
        age = data.att_age[i]	
        q = MortalityTables.hazard(m,age)
        deaths[i] ~ Binomial(data.exposures[i],q)
    end
end

mortality3 (generic function with 2 methods)

m3 = mortality3(data2,data2.deaths)

DynamicPPL.Model{typeof(mortality3), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality3, (data = 531×6 DataFrame
 Row │ issue_age  att_age  risklevel  exposures  deaths  fraction
     │ Int64      Int64    Int64      Int64      Int64   Float64
─────┼──────────────────────────────────────────────────────────────
   1 │        50       50          1        575       5  0.00869565
   2 │        50       50          2        560      11  0.0196429
   3 │        50       50          3        556      21  0.0377698
   4 │        50       51          1        548       7  0.0127737
   5 │        50       51          2        533       7  0.0131332
  ⋮  │     ⋮         ⋮         ⋮          ⋮        ⋮         ⋮
 528 │        55       83          1          5       0  0.0
 529 │        55       83          2          2       0  0.0
 530 │        55       83          3          1       0  0.0
 531 │        55       84          1          2       0  0.0
                                                    522 rows omitted, deaths = [5, 11, 21, 7, 7, 8, 7, 9, 15, 2  …  0, 1, 0, 1, 1, 0, 0, 0, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())

chain3 = sample(m3, NUTS(), 1000)

	iteration	chain	b	ā	a[1]	a[2]	a[3]	k
1	501	1	0.0397605	0.00747611	0.00204091	0.00282777	0.00456048	3.14971
2	502	1	0.0393775	0.00718988	0.0019361	0.00299858	0.00459479	2.67492
3	503	1	0.0382458	0.00455261	0.00205516	0.0030325	0.00465574	2.23378
4	504	1	0.0368445	0.00352418	0.00225198	0.00326397	0.00502476	1.52521
5	505	1	0.0341907	0.0147432	0.00260135	0.00393644	0.00590104	1.35102
6	506	1	0.0329568	0.017055	0.00279913	0.00416678	0.00639691	1.32034
7	507	1	0.0379495	0.00582986	0.00218864	0.00338655	0.00514394	3.43358
8	508	1	0.0337056	0.0123958	0.00265158	0.00399195	0.00588518	1.41406
9	509	1	0.0371748	0.00343053	0.00211543	0.00312111	0.00484137	1.00745
10	510	1	0.0345227	0.00464117	0.00253952	0.00372857	0.00578524	2.17735
...

summarize(chain3)

	parameters	mean	std	naive_se	mcse	ess	rhat	ess_per_sec
1	:b	0.0376547	0.00372902	0.000117922	0.00019069	246.104	1.00899	0.749215
2	:ā	0.0087331	0.00974532	0.000308174	0.000501709	426.733	0.999014	1.29911
3	Symbol("a[1]")	0.00223165	0.000476801	1.50778e-5	2.48849e-5	260.92	1.01057	0.794322
4	Symbol("a[2]")	0.0032482	0.000670935	2.12168e-5	3.58145e-5	259.539	1.0101	0.790116
5	Symbol("a[3]")	0.00505183	0.00098926	3.12832e-5	5.27162e-5	263.255	1.01308	0.801429
6	:k	2.39737	1.32323	0.0418442	0.0563751	380.416	0.999039	1.1581

PRECIS(DataFrame(chain3))

┌───────┬───────────────────────────────────────────────────────┐
│ param │   mean     std    5.5%     50%   94.5%      histogram │
├───────┼───────────────────────────────────────────────────────┤
│  a[1] │ 0.0022  0.0005  0.0015  0.0022   0.003        ▁▆█▄▂▁▁ │
│  a[2] │ 0.0032  0.0007  0.0023  0.0032  0.0044      ▁▃██▆▃▁▁▁ │
│  a[3] │ 0.0051   0.001  0.0036   0.005  0.0068  ▁▂▄▇▇█▆▃▂▁▁▁▁ │
│     b │ 0.0377  0.0037  0.0321  0.0373  0.0436   ▁▁▂▄▇█▇▅▃▁▁▁ │
│     k │ 2.3974  1.3232  1.0745  1.9662  5.0622       █▄▂▂▁▁▁▁ │
│     ā │ 0.0087  0.0097  0.0022   0.006  0.0219       █▁▁▁▁▁▁▁ │
└───────┴───────────────────────────────────────────────────────┘

let data = data2
    
    data_weight = data.exposures ./ sum(data.exposures)
    data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
    color_i = data.risklevel
    
    p = scatter(
        data.att_age,
        data.fraction, 
        markersize = data_weight, 
        alpha = 0.5, 
        color=color_i,
        label = "Experience data point (size indicates relative exposure quantity)",
        xlabel="age",
        ylim=(0.0,0.25),
        ylabel="mortality rate", 
        title="Parametric Bayseian Mortality"
    )
    

    # show n samples from the posterior plotted on the graph
    n = 100
    
    ages = sort!(unique(data.att_age))
    for r in 1:3	
        for i in 1:n
            s = sample(chain3,1)
            a = only(s[Symbol("a[$r]")])
            b = only(s[:b])
            k = only(s[:k])
            c = 0
            m = MortalityTables.MakehamBeard(;a,b,c,k)
            if i == 1 
                plot!(ages,age -> MortalityTables.hazard(m,age),label="risk level $r", alpha = 0.2,color=r)
            else
                plot!(ages,age -> MortalityTables.hazard(m,age),label="", alpha = 0.2,color=r)
            end
        end
    end
    p
end

Handling non-unit exposures

The key is to use the Poisson distribution:

@model function mortality4(data,deaths) 
    risk_levels = length(levels(data.risklevel))
    b ~ Exponential(0.1)
    ā ~ Exponential(0.1)
    a ~ filldist(Exponential(ā), risk_levels)
    c ~ Beta(4,18)
    k ~ truncated(Exponential(1),1,Inf)
    
    # use the variables to create a parametric mortality model

    # loop through the rows of the dataframe to let Turing observe the data 
    # and how consistent the parameters are with the data
    for i = 1:nrow(data)
        risk = data.risklevel[i]
        
        m = MortalityTables.MakehamBeard(;a=a[risk],b,c,k)
        age = data.att_age[i]	
        q = MortalityTables.hazard(m,age)
        deaths[i] ~ Poisson(data.exposures[i] * q)
    end
end

mortality4 (generic function with 2 methods)

m4 = mortality4(data2,data2.deaths)

DynamicPPL.Model{typeof(mortality4), (:data, :deaths), (), (), Tuple{DataFrame, Vector{Int64}}, Tuple{}, DynamicPPL.DefaultContext}(Main.var"workspace#3".mortality4, (data = 531×6 DataFrame
 Row │ issue_age  att_age  risklevel  exposures  deaths  fraction
     │ Int64      Int64    Int64      Int64      Int64   Float64
─────┼──────────────────────────────────────────────────────────────
   1 │        50       50          1        575       5  0.00869565
   2 │        50       50          2        560      11  0.0196429
   3 │        50       50          3        556      21  0.0377698
   4 │        50       51          1        548       7  0.0127737
   5 │        50       51          2        533       7  0.0131332
  ⋮  │     ⋮         ⋮         ⋮          ⋮        ⋮         ⋮
 528 │        55       83          1          5       0  0.0
 529 │        55       83          2          2       0  0.0
 530 │        55       83          3          1       0  0.0
 531 │        55       84          1          2       0  0.0
                                                    522 rows omitted, deaths = [5, 11, 21, 7, 7, 8, 7, 9, 15, 2  …  0, 1, 0, 1, 1, 0, 0, 0, 0, 0]), NamedTuple(), DynamicPPL.DefaultContext())

chain4 = sample(m4, NUTS(), 1000)

	iteration	chain	b	ā	a[1]	a[2]	a[3]	c
1	501	1	0.0439438	0.00307492	0.000870426	0.00149385	0.00255717	0.00638515
2	502	1	0.0438041	0.00591787	0.00108074	0.00160408	0.00290747	0.00694355
3	503	1	0.0417991	0.00359182	0.00126037	0.0021495	0.00330973	0.00474184
4	504	1	0.0416494	0.00268639	0.00130428	0.00197703	0.00325999	0.00528317
5	505	1	0.0508616	0.00126101	0.000628299	0.000956821	0.00164957	0.00809153
6	506	1	0.0524745	0.00151628	0.000407118	0.000850948	0.0014247	0.0098292
7	507	1	0.0440666	0.00198958	0.00104292	0.00186851	0.00317848	0.00531812
8	508	1	0.0493017	0.00451559	0.000665149	0.00118859	0.00186156	0.00715901
9	509	1	0.0483145	0.00784937	0.000785016	0.00139438	0.00221619	0.00743335
10	510	1	0.0488616	0.00382112	0.000843573	0.00124841	0.0024025	0.0048838
...

PRECIS(DataFrame(chain4))

┌───────┬────────────────────────────────────────────────────────┐
│ param │   mean     std    5.5%     50%   94.5%       histogram │
├───────┼────────────────────────────────────────────────────────┤
│  a[1] │  0.001  0.0004  0.0004   0.001  0.0017  ▁▂▅█▇▇▆▄▂▁▁▁▁▁ │
│  a[2] │ 0.0016  0.0006  0.0007  0.0015  0.0026       ▁▅██▅▂▁▁▁ │
│  a[3] │ 0.0027  0.0009  0.0013  0.0026  0.0042   ▁▄▇██▇▅▂▁▁▁▁▁ │
│     b │ 0.0461  0.0058  0.0383  0.0453   0.056       ▁▃█▇▄▂▁▁▁ │
│     c │ 0.0061  0.0021  0.0026  0.0059  0.0096   ▁▃▄▆█▇▆▄▃▁▁▁▁ │
│     k │ 2.5842  1.4751  1.0752  2.1688  5.2745    █▅▃▂▁▁▁▁▁▁▁▁ │
│     ā │ 0.0052  0.0071  0.0008  0.0029   0.017        █▁▁▁▁▁▁▁ │
└───────┴────────────────────────────────────────────────────────┘

risk_factors4 = [mean(chain4[Symbol("a[$f]")]) for f in 1:3]

3-element Vector{Float64}:
 0.0009925896942950376
 0.001597560230398143
 0.0026938300706679545

risk_factors4 ./ risk_factors4[2]

3-element Vector{Float64}:
 0.6213159763295215
 1.0
 1.6862150292741074

let data = data2
    
    data_weight = data.exposures ./ sum(data.exposures)
    data_weight = .√(data_weight ./ maximum(data_weight) .* 20)
    color_i = data.risklevel
    
    p = scatter(
        data.att_age,
        data.fraction, 
        markersize = data_weight, 
        alpha = 0.5, 
        color=color_i,
        label = "Experience data point (size indicates relative exposure quantity)",
        xlabel="age",
        ylim=(0.0,0.25),
        ylabel="mortality rate", 
        title="Parametric Bayseian Mortality"
    )
    

    # show n samples from the posterior plotted on the graph
    n = 100
    
    ages = sort!(unique(data.att_age))
    for r in 1:3	
        for i in 1:n
            s = sample(chain4,1)
            a = only(s[Symbol("a[$r]")])
            b = only(s[:b])
            k = only(s[:k])
            c = 0
            m = MortalityTables.MakehamBeard(;a,b,c,k)
            if i == 1 
                plot!(ages,age -> MortalityTables.hazard(m,age),label="risk level $r", alpha = 0.2,color=r)
            else
                plot!(ages,age -> MortalityTables.hazard(m,age),label="", alpha = 0.2,color=r)
            end
        end
    end
    p
end

Predictions

We can generate predictive estimates by passing a vector of missing in place of the outcome variables and then calling predict.

We get a table of values where each row is the the prediction implied by the corresponding chain sample, and the columns are the predicted value for each of the outcomes in our original dataset.

preds = predict(mortality4(data2,fill(missing,length(data2.deaths))),chain4)

	iteration	chain	deaths[1]	deaths[2]	deaths[3]	deaths[4]	deaths[5]	deaths[6]
1	1	1	8.0	10.0	13.0	4.0	9.0	8.0
2	2	1	6.0	7.0	20.0	12.0	11.0	15.0
3	3	1	6.0	13.0	21.0	11.0	12.0	18.0
4	4	1	10.0	12.0	18.0	13.0	13.0	12.0
5	5	1	6.0	6.0	17.0	6.0	13.0	16.0
6	6	1	7.0	9.0	10.0	10.0	15.0	13.0
7	7	1	12.0	11.0	16.0	6.0	16.0	16.0
8	8	1	7.0	13.0	14.0	13.0	7.0	15.0
9	9	1	10.0	14.0	17.0	6.0	13.0	14.0
10	10	1	0.0	12.0	16.0	6.0	11.0	16.0
...

size(preds)

(1000, 531, 1)

Built with Julia 1.8.5 and

DataFramesMeta 0.12.0
MCMCChains 5.6.1
MortalityTables 2.3.0
Pipe 1.3.0
Plots 1.36.6
PlutoUI 0.7.49
StatisticalRethinking 4.7.0
StatsFuns 1.1.1
StatsPlots 0.15.4
Turing 0.23.3

Run this Pluto Notebook

To run this page locally, download this file and open it with Pluto.jl.

The packages in JuliaActuary are open-source and liberally licensed (MIT License) to allow wide private and commercial usage of the packages, like the base Julia language and many other packages in the ecosystem. See terms of this site.