Stochastic claims projections demo

begin
    using CSV, DataFrames
    using MortalityTables, ActuaryUtilities
    using Dates
    using ThreadsX
    using BenchmarkTools
end

Define a datatype. Not strictly necessary, but will make extending the program with more functions easier.

Type annotations are optional, but providing them is able to coerce the values to be all plain bits (i.e. simple, non-referenced values like arrays are) when the type is constructed. This makes the whole data be stored in the stack and is an example of data-oriented design. It's much slower without the type annotations (~0.5 million policies per second, ~50x slower)

begin
    @enum Sex Female = 1 Male = 2 
    @enum Risk Standard = 1 Preferred = 2
end

struct Policy
    id::Int
    sex::Sex
    benefit_base::Float64
    COLA::Float64
    mode::Int
    issue_date::Date
    issue_age::Int
    risk::Risk
end

Load the data:

sample_csv_data = 
IOBuffer(
    raw"id,sex,benefit_base,COLA,mode,issue_date,issue_age,risk
        1,M,100000.0,0.03,12,1999-12-05,30,Std
        2,F,200000.0,0.03,12,1999-12-05,30,Pref"
)

IOBuffer(data=UInt8[...], readable=true, writable=false, seekable=true, append=false, size=138, maxsize=Inf, ptr=1, mark=-1)

policies = let 

    # read CSV directly into a dataframe
    # df = CSV.read("sample_inforce.csv",DataFrame) # use local string for notebook
    df = CSV.read(sample_csv_data,DataFrame)

    # map over each row and construct an array of Policy objects
    map(eachrow(df)) do row
        Policy(
            row.id,
            row.sex == "M" ? Male : Female,
            row.benefit_base,
            row.COLA,
            row.mode,
            row.issue_date,
            row.issue_age,
            row.risk == "Std" ? Standard : Preferred,
        )
    end

    
end

2-element Vector{Policy}:
 Policy(1, Male, 100000.0, 0.03, 12, Date("1999-12-05"), 30, Standard)
 Policy(2, Female, 200000.0, 0.03, 12, Date("1999-12-05"), 30, Preferred)

Define what mortality gets used:

mort = Dict(
    Male => MortalityTables.table(988).ultimate,
    Female => MortalityTables.table(992).ultimate,
)

Dict{Sex, OffsetArrays.OffsetVector{Float64, Vector{Float64}}} with 2 entries:
  Female => [0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.…
  Male   => [0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.02…

function mortality(pol::Policy,params)
    return params.mortality[pol.sex]
end

mortality (generic function with 1 method)

This defines the core logic of the policy projection and will write the results to the given out container (here, a named tuple of arrays).

This is using a threaded approach where it could be operating on any of the computer's available threads, thus acheiving thread-based parallelism (as opposed to multi-processor (multi-machine) or GPU-based computation which requires formulating the problem a bit differently (array/matrix based). For the scale of computation here, I think I'd apply this model of parallelism.

function pol_project!(out,policy,params)
    # some starting values for the given policy
    dur = duration(policy.issue_date,params.val_date)
    start_age = policy.issue_age + dur - 1
    COLA_factor = (1+policy.COLA)
    cur_benefit = policy.benefit_base * COLA_factor ^ (dur-1)

    # get the right mortality vector
    qs = mortality(policy,params)

    # grab the current thread's id to write to results container without conflicting with other threads
    tid = Threads.threadid()
    
    ω = lastindex(qs) 

    # inbounds turns off bounds-checking, which makes hot loops faster but first write loop without it to ensure you don't create an error (will crash if you have the error without bounds checking)
    @inbounds for t in 1:min(params.proj_length,ω - start_age)
        
        q = qs[start_age + t] # get current mortality
        
        if (rand() < q) 
            return # if dead then just return and don't increment the results anymore
        else
            # pay benefit, add a life to the output count, and increment the benefit for next year
            out.benefits[t,tid] += cur_benefit
            out.lives[t,tid] += 1
            cur_benefit *= COLA_factor
        end
    end
end

pol_project! (generic function with 1 method)

Parameters for our projection:

using Random

params = (
    val_date = Date(2021,12,31),
    proj_length = 100,
    mortality = mort,
)

(val_date = Date("2021-12-31"), proj_length = 100, mortality = Dict{Main.var"workspace#5".Sex, OffsetArrays.OffsetVector{Float64, Vector{Float64}}}(Female => [0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745  …  0.376246, 0.386015, 0.393507, 0.398308, 0.4, 0.4, 0.4, 0.4, 0.4, 1.0], Male => [0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571  …  0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4]))

Check the number of threads we're using:

Threads.nthreads()

function project(policies,params)
    threads = Threads.nthreads()
    benefits = zeros(params.proj_length,threads)
    lives = zeros(Int,params.proj_length,threads)
    out = (;benefits,lives)
    ThreadsX.foreach(policies) do pol
        pol_project!(out,pol,params)
    end
    map(x->vec(reduce(+,x,dims=2)),out)
end

project (generic function with 1 method)

Example of single projection:

project(repeat(policies,100_000),params)

(benefits = [5.630163290379324e10, 5.673567822864914e10, 5.714672103846871e10, 5.745305701188412e10, 5.7713185888191635e10, 5.7944767128179375e10, 5.8114506927619736e10, 5.8227198805188446e10, 5.823876482884627e10, 5.821812096136868e10  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [195249, 190473, 185677, 180687, 175588, 170604, 165574, 160542, 155397, 150278  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Benchmarking

Using a Macbook Air laptop, about 30 million policies able to be stochastically projected per second:

let
    policies_to_benchmark = 3_000_000
    # adjust the `repeat` depending on how many policies are already in the array
    # to match the target number for the benchmark
    n = policies_to_benchmark ÷ length(policies)
    
    @benchmark project(p,r) setup=(p=repeat($policies,$n);r=$params)
end

BenchmarkTools.Trial: 4 samples with 1 evaluation.
 Range (min … max):  1.084 s …   1.265 s  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     1.175 s              ┊ GC (median):    0.00%
 Time  (mean ± σ):   1.175 s ± 74.064 ms  ┊ GC (mean ± σ):  0.00% ± 0.00%

  █                         █    █                        █  
  █▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁
  1.08 s         Histogram: frequency by time        1.26 s <

 Memory estimate: 16.30 KiB, allocs estimate: 144.

Stochastic Set

Loop through and calculate the reults n times (this is only running the two policies in the sample data" n times).

function stochastic_proj(policies,params,n)

    ThreadsX.map(1:n) do i
        project(policies,params)
    end
end

stochastic_proj (generic function with 1 method)

stoch = stochastic_proj(policies,params,1000)

1000-element Vector{NamedTuple{(:benefits, :lives), Tuple{Vector{Float64}, Vector{Int64}}}}:
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 431318.2535087685, 444257.8011140316, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 1, 1, 0, 0, 0, 0  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [383220.68177215644, 394717.3022253211, 406558.82129208074, 418755.5859308432, 431318.2535087685, 444257.8011140316, 457585.53514745255, 471313.1012018761, 485452.49423793243, 500016.0690650704  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 ⋮
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 250008.0345325352  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 485452.49423793243, 500016.0690650704  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 1, 1  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 394717.3022253211, 406558.82129208074, 418755.5859308432, 431318.2535087685, 444257.8011140316, 457585.53514745255, 471313.1012018761, 485452.49423793243, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 1, 1, 1, 1, 1, 1, 1, 1, 0  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 222128.9005570158, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 1, 0, 0, 0, 0  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2  …  0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

using UnicodePlots

let
    v = [pv(0.03,s.benefits) for s in stoch]
    histogram(v,nbins=15)
end

                  ┌                                        ┐ 
   [0.0  , 2.0e6) ┤████▌ 19                                  
   [2.0e6, 4.0e6) ┤█████████████▍ 57                         
   [4.0e6, 6.0e6) ┤████████████████████▋ 88                  
   [6.0e6, 8.0e6) ┤█████████████████████████▊ 110            
   [8.0e6, 1.0e7) ┤███████████████████████████▏ 115          
   [1.0e7, 1.2e7) ┤███████████████████████████████████  149  
   [1.2e7, 1.4e7) ┤████████████████████████████████▊ 140     
   [1.4e7, 1.6e7) ┤██████████████████████████████▌ 130       
   [1.6e7, 1.8e7) ┤███████████████████▏ 81                   
   [1.8e7, 2.0e7) ┤███████████████▌ 66                       
   [2.0e7, 2.2e7) ┤███████▌ 32                               
   [2.2e7, 2.4e7) ┤██▍ 10                                    
   [2.4e7, 2.6e7) ┤▎ 1                                       
   [2.6e7, 2.8e7) ┤▌ 2                                       
                  └                                        ┘ 
                                   Frequency

Further Optimization

In no particular order:

the RNG could be made faster: https://bkamins.github.io/julialang/2020/11/20/rand.html
Could make the stochastic set distributed, but at the current speed the overhead of distributed computing is probably more time than it would save. Same thing with GPU projections
...

Built with Julia 1.8.5 and

ActuaryUtilities 3.2.1
BenchmarkTools 1.3.1
CSV 0.10.4
DataFrames 1.3.4
MortalityTables 2.3.0
ThreadsX 0.1.10
UnicodePlots 2.11.2

Run this Pluto Notebook

To run this page locally, download this file and open it with Pluto.jl.

The packages in JuliaActuary are open-source and liberally licensed (MIT License) to allow wide private and commercial usage of the packages, like the base Julia language and many other packages in the ecosystem. See terms of this site.