begin
using CSV, DataFrames
using MortalityTables, ActuaryUtilities
using Dates
using ThreadsX
using BenchmarkTools
end
Define a datatype. Not strictly necessary, but will make extending the program with more functions easier.
Type annotations are optional, but providing them is able to coerce the values to be all plain bits (i.e. simple, non-referenced values like arrays are) when the type is constructed. This makes the whole data be stored in the stack and is an example of data-oriented design. It's much slower without the type annotations (~0.5 million policies per second, ~50x slower)
begin
@enum Sex Female = 1 Male = 2
@enum Risk Standard = 1 Preferred = 2
end
struct Policy
id::Int
sex::Sex
benefit_base::Float64
COLA::Float64
mode::Int
issue_date::Date
issue_age::Int
risk::Risk
end
Load the data:
sample_csv_data =
IOBuffer(
raw"id,sex,benefit_base,COLA,mode,issue_date,issue_age,risk
1,M,100000.0,0.03,12,1999-12-05,30,Std
2,F,200000.0,0.03,12,1999-12-05,30,Pref"
)
IOBuffer(data=UInt8[...], readable=true, writable=false, seekable=true, append=false, size=138, maxsize=Inf, ptr=1, mark=-1)
policies = let
# read CSV directly into a dataframe
# df = CSV.read("sample_inforce.csv",DataFrame) # use local string for notebook
df = CSV.read(sample_csv_data,DataFrame)
# map over each row and construct an array of Policy objects
map(eachrow(df)) do row
Policy(
row.id,
row.sex == "M" ? Male : Female,
row.benefit_base,
row.COLA,
row.mode,
row.issue_date,
row.issue_age,
row.risk == "Std" ? Standard : Preferred,
)
end
end
2-element Vector{Policy}: Policy(1, Male, 100000.0, 0.03, 12, Date("1999-12-05"), 30, Standard) Policy(2, Female, 200000.0, 0.03, 12, Date("1999-12-05"), 30, Preferred)
Define what mortality gets used:
mort = Dict(
Male => MortalityTables.table(988).ultimate,
Female => MortalityTables.table(992).ultimate,
)
Dict{Sex, OffsetArrays.OffsetVector{Float64, Vector{Float64}}} with 2 entries: Female => [0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.… Male => [0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.02…
function mortality(pol::Policy,params)
return params.mortality[pol.sex]
end
mortality (generic function with 1 method)
This defines the core logic of the policy projection and will write the results to the given out
container (here, a named tuple of arrays).
This is using a threaded approach where it could be operating on any of the computer's available threads, thus acheiving thread-based parallelism (as opposed to multi-processor (multi-machine) or GPU-based computation which requires formulating the problem a bit differently (array/matrix based). For the scale of computation here, I think I'd apply this model of parallelism.
function pol_project!(out,policy,params)
# some starting values for the given policy
dur = duration(policy.issue_date,params.val_date)
start_age = policy.issue_age + dur - 1
COLA_factor = (1+policy.COLA)
cur_benefit = policy.benefit_base * COLA_factor ^ (dur-1)
# get the right mortality vector
qs = mortality(policy,params)
# grab the current thread's id to write to results container without conflicting with other threads
tid = Threads.threadid()
ω = lastindex(qs)
# inbounds turns off bounds-checking, which makes hot loops faster but first write loop without it to ensure you don't create an error (will crash if you have the error without bounds checking)
@inbounds for t in 1:min(params.proj_length,ω - start_age)
q = qs[start_age + t] # get current mortality
if (rand() < q)
return # if dead then just return and don't increment the results anymore
else
# pay benefit, add a life to the output count, and increment the benefit for next year
out.benefits[t,tid] += cur_benefit
out.lives[t,tid] += 1
cur_benefit *= COLA_factor
end
end
end
pol_project! (generic function with 1 method)
Parameters for our projection:
using Random
params = (
val_date = Date(2021,12,31),
proj_length = 100,
mortality = mort,
)
(val_date = Date("2021-12-31"), proj_length = 100, mortality = Dict{Main.var"workspace#5".Sex, OffsetArrays.OffsetVector{Float64, Vector{Float64}}}(Female => [0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745, 0.00745 … 0.376246, 0.386015, 0.393507, 0.398308, 0.4, 0.4, 0.4, 0.4, 0.4, 1.0], Male => [0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571, 0.022571 … 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4]))
Check the number of threads we're using:
Threads.nthreads()
2
function project(policies,params)
threads = Threads.nthreads()
benefits = zeros(params.proj_length,threads)
lives = zeros(Int,params.proj_length,threads)
out = (;benefits,lives)
ThreadsX.foreach(policies) do pol
pol_project!(out,pol,params)
end
map(x->vec(reduce(+,x,dims=2)),out)
end
project (generic function with 1 method)
Example of single projection:
project(repeat(policies,100_000),params)
(benefits = [5.630163290379324e10, 5.673567822864914e10, 5.714672103846871e10, 5.745305701188412e10, 5.7713185888191635e10, 5.7944767128179375e10, 5.8114506927619736e10, 5.8227198805188446e10, 5.823876482884627e10, 5.821812096136868e10 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [195249, 190473, 185677, 180687, 175588, 170604, 165574, 160542, 155397, 150278 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Using a Macbook Air laptop, about 30 million policies able to be stochastically projected per second:
let
policies_to_benchmark = 3_000_000
# adjust the `repeat` depending on how many policies are already in the array
# to match the target number for the benchmark
n = policies_to_benchmark ÷ length(policies)
@benchmark project(p,r) setup=(p=repeat($policies,$n);r=$params)
end
BenchmarkTools.Trial: 4 samples with 1 evaluation. Range (min … max): 1.084 s … 1.265 s ┊ GC (min … max): 0.00% … 0.00% Time (median): 1.175 s ┊ GC (median): 0.00% Time (mean ± σ): 1.175 s ± 74.064 ms ┊ GC (mean ± σ): 0.00% ± 0.00% █ █ █ █ █▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁ 1.08 s Histogram: frequency by time 1.26 s < Memory estimate: 16.30 KiB, allocs estimate: 144.
Loop through and calculate the reults n
times (this is only running the two policies in the sample data" n
times).
function stochastic_proj(policies,params,n)
ThreadsX.map(1:n) do i
project(policies,params)
end
end
stochastic_proj (generic function with 1 method)
stoch = stochastic_proj(policies,params,1000)
1000-element Vector{NamedTuple{(:benefits, :lives), Tuple{Vector{Float64}, Vector{Int64}}}}: (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 431318.2535087685, 444257.8011140316, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 1, 1, 0, 0, 0, 0 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [383220.68177215644, 394717.3022253211, 406558.82129208074, 418755.5859308432, 431318.2535087685, 444257.8011140316, 457585.53514745255, 471313.1012018761, 485452.49423793243, 500016.0690650704 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) ⋮ (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 250008.0345325352 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 1 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 485452.49423793243, 500016.0690650704 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 1, 1 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 394717.3022253211, 406558.82129208074, 418755.5859308432, 431318.2535087685, 444257.8011140316, 457585.53514745255, 471313.1012018761, 485452.49423793243, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 1, 1, 1, 1, 1, 1, 1, 1, 0 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 222128.9005570158, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 1, 0, 0, 0, 0 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) (benefits = [574831.0226582347, 592075.9533379817, 609838.2319381211, 628133.3788962648, 646977.3802631528, 666386.7016710474, 686378.3027211789, 706969.6518028142, 728178.7413568987, 750024.1035976056 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], lives = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
using UnicodePlots
let
v = [pv(0.03,s.benefits) for s in stoch]
histogram(v,nbins=15)
end
┌ ┐ [0.0 , 2.0e6) ┤████▌ 19 [2.0e6, 4.0e6) ┤█████████████▍ 57 [4.0e6, 6.0e6) ┤████████████████████▋ 88 [6.0e6, 8.0e6) ┤█████████████████████████▊ 110 [8.0e6, 1.0e7) ┤███████████████████████████▏ 115 [1.0e7, 1.2e7) ┤███████████████████████████████████ 149 [1.2e7, 1.4e7) ┤████████████████████████████████▊ 140 [1.4e7, 1.6e7) ┤██████████████████████████████▌ 130 [1.6e7, 1.8e7) ┤███████████████████▏ 81 [1.8e7, 2.0e7) ┤███████████████▌ 66 [2.0e7, 2.2e7) ┤███████▌ 32 [2.2e7, 2.4e7) ┤██▍ 10 [2.4e7, 2.6e7) ┤▎ 1 [2.6e7, 2.8e7) ┤▌ 2 └ ┘ Frequency
In no particular order:
the RNG could be made faster: https://bkamins.github.io/julialang/2020/11/20/rand.html
Could make the stochastic set distributed, but at the current speed the overhead of distributed computing is probably more time than it would save. Same thing with GPU projections
...
Built with Julia 1.8.5 and
ActuaryUtilities 3.2.1To run this page locally, download this file and open it with Pluto.jl.