2023-09-20 · STA conntest for diff recording quality n durations

include("lib/Nto1.jl")
using Revise … ✔ (0.3 s)
using Units, Nto1AdEx, ConnectionTests, ConnTestEval, MemDiskCache … ✔ (0.4 s)
using StatsBase … ✔ (0.2 s)
N = 6500
duration = 10minutes
@time sim = Nto1AdEx.sim(N, duration);  # ceil_spikes is true by default
  3.204986 seconds (2.10 M allocations: 1.024 GiB, 2.30% gc time, 42.37% compilation time)

VI noise

(; Vₛ, Eₗ) = Nto1AdEx

function VI_sig(sim; spike_SNR = 10, spike_height = (Vₛ - Eₗ), seed=1)
    Random.seed!(seed)
    σ = spike_height / spike_SNR
    sig = copy(sim.V)
    sig .+= (σ .* randn(length(sig)))
    sig
end;

sig = VI_sig(sim);
include("lib/plot.jl")
import PythonCall … ✔ (2 s)
import PythonPlot … ✔ (3.5 s)
using Sciplotlib … ✔ (0.5 s)
using PhDPlots … ✔
plotsig(sig, [0,1000], ms, yunit=:mV);
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_6_0.png

Very good.

VI_sig(sim, spike_SNR=Inf) == sim.V
true

Excellent.

Clip the VI sig

include("lib/df.jl")
using DataFrames … ✔ (0.9 s)
ps = [95, 98, 99, 99.9, 99.95, 99.99, 100]
qs = percentile(sig, ps)
df = DataFrame("p" => ps, "V (mV)" => qs/mV)
7×2 DataFrame
RowpV (mV)
Float64Float64
195-36.5
298-32.1
399-29.1
499.9-19.1
5100-11.2
610048.6
710083.5

For the clean (no VI noise) signal, we clipped at -49.79 mV (which was there the 99 percentile)

If we do same:

clip!(sig, p = 99) = begin
    thr = percentile(sig, p)
    to_clip = sig .≥ thr
    sig[to_clip] .= thr
    sig
end;
sigc = clip!(copy(sig), 99);
tlim=[0,1000]
plotsig(sig , tlim, ms, yunit=:mV)
plotsig(sigc, tlim, ms, yunit=:mV, yunits_in=nothing);
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_17_0.png

Hm, we might loose some non-spike signal there?
So let’s choose sth stricter: (note that this is all for VI SNR 10, no systematic search.. But 10 is a realistic SNR, so, fine).

clip_pctile = 99.9
sigc = clip!(copy(sig), clip_pctile);
plotsig(sig , tlim, ms, yunit=:mV)
plotsig(sigc, tlim, ms, yunit=:mV, yunits_in=nothing);
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_20_0.png

Guess we could also set an absolute value, like clip at 0 or -20 mV or sth.
But in VI recordings there is no scale.

Maybe there’s some other way. Aren’t those spikes bunched/clustered? yeah! i suppose they are..
So we do.. uhm.. clustering on V distribution? I kinda like it..
“Oja’s algorithm”? ooooh yeah.
no wait, that’s sth else.
But there is a similar sounding algorithm, used in quantization/discretization.
Splitting sth (a distribution) in two..

Otsu’s method!

https://github.com/mdhumphries/HierarchicalConsensusClustering/blob/master/Helper_Functions/otsu1D.m

Test conns

cachedir = "2023-09-20__STA_conntest_for_diff_recording_quality_n_durations";
sim_ = CachedFunction(Nto1AdEx.sim, cachedir; disk=false, duration=10minutes, N)
CachedFunction(Nto1AdEx.sim, ThreadSafeDicts.ThreadSafeDict{Any, Any}(), false, true, "C:\\Users\\tfiers\\.julia\\MemDiskCache.jl\\2023-09-20__STA_conntest_for_diff_recording_quality_n_durations\\sim", Symbol[], Base.Pairs{Symbol, Real, Tuple{Symbol, Symbol}, NamedTuple{(:duration, :N), Tuple{Float64, Int64}}}(:duration => 600, :N => 6500))
MemDiskCache.set_dir(cachedir);
MemDiskCache.open_dir()
Process(`'C:\WINDOWS\system32\cmd.exe' /c start 'C:\Users\tfiers\.julia\MemDiskCache.jl\2023-09-20__STA_conntest_for_diff_recording_quality_n_durations'`, ProcessRunning)
function sim_and_test(; duration=10minutes, VI_SNR=Inf, seed=1)
    sim = sim_(; duration, seed)
    sig = VI_sig(sim, spike_SNR=VI_SNR)
    sigc = clip!(sig, clip_pctile)
    key = "sim_and_test" * string((; duration, VI_SNR, seed)) * "__rows"
    rows = @cached key test_high_firing_inputs(sim, sigc)
    # ↪ every row is a putative connection. (real type, t-val, presyn fr)
    df = DataFrame(rows)
    sweep = sweep_threshold(df)
    return (; sim, sigc, df, sweep)
end;
maxF1(sweep) = maximum(skipnan(sweep.F1));

↓ expected runtime: 8 SNRs x 5 seeds x 56 seconds = 37 minutes

SNRs = [Inf, 100, 40, 20, 10, 4, 2, 1];
rows = []
@time for VI_SNR in SNRs
    for seed in 1:5
        kw = (; duration, VI_SNR, seed)
        key = "sim_and_test$(kw)__sweep"
        row = @cached key begin
            (; sweep) = sim_and_test(; VI_SNR, seed)
            F1max = maxF1(sweep)
            (; AUC) = calc_AUROCs(sweep)
            row = (; VI_SNR, seed, F1max, AUC)
        end
        push!(rows, row)
        #println("\n", row, "\n"^2)
    end
end;
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = Inf, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = Inf, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = Inf, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = Inf, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = Inf, seed = 5)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 100, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 100, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 100, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 100, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 100, seed = 5)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 40, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 40, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 40, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 40, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 40, seed = 5)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 20, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 20, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 20, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 20, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 20, seed = 5)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 10, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 10, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 10, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 10, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 10, seed = 5)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 4, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 4, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 4, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 4, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 4, seed = 5)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 2, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 2, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 2, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 2, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 2, seed = 5)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 1, seed = 1)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 1, seed = 2)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 1, seed = 3)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 1, seed = 4)__sweep` from memory
[ Info: Loading `sim_and_test(duration = 600, VI_SNR = 1, seed = 5)__sweep` from memory
  0.038367 seconds (30.32 k allocations: 2.026 MiB)
df = df_snr = DataFrame(rows)
showsimple(df)
 VI_SNR  seed  F1max  AUC   
────────────────────────────
 Inf     1     0.829  0.796
 Inf     2     0.895  0.913
 Inf     3     0.851  0.834
 Inf     4     0.893  0.896
 Inf     5     0.838  0.859
 100     1     0.72   0.679
 100     2     0.794  0.78
 100     3     0.792  0.746
 100     4     0.812  0.789
 100     5     0.761  0.696
  40     1     0.604  0.455
  40     2     0.598  0.49
   ⋮      ⋮      ⋮      ⋮
   4     5     0.459  0.315
   2     1     0.398  0.233
   2     2     0.39   0.237
   2     3     0.451  0.292
   2     4     0.442  0.279
   2     5     0.447  0.306
   1     1     0.386  0.225
   1     2     0.375  0.227
   1     3     0.438  0.282
   1     4     0.424  0.264
   1     5     0.449  0.305
             17 rows omitted

Without clipping (from a first run, when I forgot to clip; only ceilin):

(VI_SNR = Inf, F1max = 0.697)
(VI_SNR = 10,  F1max = 0.469)
(VI_SNR = 4,   F1max = 0.404)
(VI_SNR = 2,   F1max = 0.398)
(VI_SNR = 1,   F1max = 0.390)

Note that, except at Inf snr, not much diff !

Plot

fmt(x) = isinf(x) ? "∞" : round(Int, x);
plot_F1(df, xcol; ax=newax(), title=true, kw...) = begin
    plot_dots_and_means(df, xcol, :F1max; ax, ytype=:frac, color_means=C0, ylabel=nothing, kw...)
    deemph_middle_ticks(ax.yaxis)
    t = hylabel(ax, L"Maximum $F_1$-score (mean of Recall & Precision)")
    title && ax.annotate("Connection detection performance of STA test", fontweight="bold",
                         xy=(0, 1.3), xycoords=t, va="bottom")
    return ax
end;
xlabel = "Voltage imaging noise (spike-SNR)"
xticklabels=fmt.(SNRs);
plot_F1(df_snr, :VI_SNR; xtype=:cat, xlabel, xticklabels);
savefig_phd("STA_perf_diff_snr_F1");
Saved at `../thesis/figs/STA_perf_diff_snr_F1.pdf`
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_38_1.png
plot_F1(df_snr, :VI_SNR; xtype=:cat, xlabel, xticklabels);
# savefig_phd("STA_perf_diff_snr_F1");
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_39_0.png
plot_AUC(df, xcol; ax=newax(), chance_level_loc=:left, title=true, kw...) = begin
    color = "gray"
    chance_level = 0.252  # See section below :)
    l = ax.axhline(chance_level; lw=1, ls="--", color)
    if chance_level_loc == :left
        xy = (0,0)
        ha = "left"
    else
        xy = (1,0)
        ha = "right"
    end
    ax.annotate("Chance level"; xycoords=l, xy, ha, va="bottom", color)
    
    plot_dots_and_means(df, xcol, :AUC; ax, ylim=[0,1], ylabel=nothing, kw...)
    deemph_middle_ticks(ax.yaxis)
    t = hylabel(ax, "Area under ROC curve (AUC)");
    title && ax.annotate("Connection detection performance of STA test", fontweight="bold",
                         xy=(0, 1.3), xycoords=t, va="bottom");
    return ax
end;
plot_AUC(df_snr, :VI_SNR; xtype=:cat, xticklabels, xlabel);
# savefig_phd("STA_perf_diff_snr_auc");
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_41_0.png

One fig. Tighter ylims.

fig, (ax_top, ax_btm) = plt.subplots(nrows=2, figsize=(mtw,1.1mtw), height_ratios=[1,1.2])
xlabel = ("Voltage imaging noise (spike-SNR)", :fontweight=>"bold")
xticklabels=fmt.(SNRs)
plot_F1(df_snr, :VI_SNR; xtype=:cat, ylim=[0.4, 1], ax=ax_top, title=false, xlabel=nothing)
rm_ticks_and_spine(ax_top, "bottom")
plot_AUC(df_snr, :VI_SNR; xtype=:cat, ylim=[0, 1], ax=ax_btm, title=false, xlabel, xticklabels);
fig.suptitle("Connection detection performance of STA test", x=0.5155)
plt.tight_layout(h_pad=1.7);
savefig_phd("STA_perf_diff_snr");
Saved at `../thesis/figs/STA_perf_diff_snr.pdf`
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_43_1.png

Comparing AUC and F1max directly.

gd = groupby(df_snr, :VI_SNR)
dfm = combine(gd, [:F1max, :AUC] .=> mean, renamecols=false)
8×3 DataFrame
RowVI_SNRF1maxAUC
Float64Float64Float64
1Inf0.8610.86
21000.7760.738
3400.6360.498
4200.5430.372
5100.4790.312
640.4390.279
720.4260.269
810.4140.261
ax=newax(figsize=(2.2, 2.2));
plot([0,1], [0,1], "--"; ax, c="lightgrey")
plot(dfm.AUC, dfm.F1max, ".-"; ax, aspect="equal", xlim=[0,1], ylim=[0,1],
     xlabel="AUC", ylabel=L"\max\ F_1", ytype=:fraction);
deemph_middle_ticks(ax);
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_46_0.png

Longer recording

plotsig(VI_sig(sim; spike_SNR=40), tlim, ms, yunit=:mV);
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_48_0.png

Sure, I can see that happening.

out = @cached "longer_rec" sim_and_test(duration=60minutes, VI_SNR=40);
[ Info: Loading `longer_rec` from memory
(; sweep) = out
maxF1(sweep)
0.879
calc_AUROCs(sweep)
(AUC = 0.893, AUCₑ = 0.821, AUCᵢ = 0.966)

Chance level

What is AUROC chance level for our weird ternary classification.

Assign classes randomly. Or rather give random connectedness-scores (t-vals).

random_test_result(Nₜ = 100) = begin
    conntype = repeat([:exc, :inh, :unc], inner=Nₜ)
    N = length(conntype)
    t = 2*rand(N) .- 1  # Uniform in [-1, 1]
    df = DataFrame(; conntype, t)
end

for _ in 1:5
    df = random_test_result()
    sweep = sweep_threshold(df)
    println(calc_AUROCs(sweep))
end
(AUC = 0.247, AUCₑ = 0.302, AUCᵢ = 0.192)
(AUC = 0.305, AUCₑ = 0.328, AUCᵢ = 0.282)
(AUC = 0.251, AUCₑ = 0.234, AUCᵢ = 0.268)
(AUC = 0.208, AUCₑ = 0.192, AUCᵢ = 0.224)
(AUC = 0.224, AUCₑ = 0.225, AUCᵢ = 0.224)

So chance level is even less than 0.333

ax = plotROC(sweep, legend_loc="upper center")
deemph_middle_ticks(ax);
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_57_0.png
random_test_AUC() = calc_AUROCs(sweep_threshold(random_test_result())).AUC
@time random_test_AUC()
  0.029802 seconds (72.67 k allocations: 11.245 MiB)
0.266
rm_from_memcache!("random_test_AUCs")
rm_from_disk("random_test_AUCs")
Random.seed!(1234)
random_test_AUCs = @cached "random_test_AUCs" [random_test_AUC() for _ in 1:300];
Running `[random_test_AUC() for _ = 1:300]` with key `random_test_AUCs` … 
Running `[random_test_AUC() for _ = 1:300]` with key `random_test_AUCs` ✔ (9.1 s)
Saving output at [C:\Users\tfiers\.julia\MemDiskCache.jl\2023-09-20__STA_conntest_for_diff_recording_quality_n_durations\random_test_AUCs.jld2] … ✔

(4 seconds)

(; ConnectionPatch) = mpl.patches;

function connect(axA, axB, xyA, xyB=xyA; lw=0.8, ls="--", color=Gray(0.4), kw...)
    fig = axA.figure
    con = ConnectionPatch(xyA, xyB, axA.transData, axB.transData; lw, ls, color=toRGBAtuple(color), kw...)
    fig.add_artist(con)
end;
fig, (ax_top, ax_btm) = plt.subplots(nrows=2, figsize=(4, 2.2))
distplot(random_test_AUCs; ax=ax_top, xlim=[0, 1], lines=false)
distplot(random_test_AUCs; ax=ax_btm, xlabel="Area under ROC curve (AUC)")
deemph_middle_ticks(ax_top)
xl, xr = ax_btm.get_xlim()
yb, yt = ax_top.get_ylim()
connect(ax_top, ax_btm, (xl, yb), (xl, yt))
connect(ax_top, ax_btm, (xr, yb), (xr, yt))
ax_top.vlines([xl, xr], yb, yt, color=toRGBAtuple(Gray(0.4)), lw=0.8)
fig.suptitle("Performance of random connection classifier")
plt.tight_layout(h_pad=2.6);
savefig_phd("AUC_chance_level")
Saved at `../thesis/figs/AUC_chance_level.pdf`
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_63_1.png
random_test_maxF1() = maxF1(sweep_threshold(random_test_result()))
random_test_maxF1()
0.375
Random.seed!(1234)
random_test_F1s = @cached "random_test_F1s" [random_test_maxF1() for _ in 1:300];
Running `[random_test_maxF1() for _ = 1:300]` with key `random_test_F1s` … 
Running `[random_test_maxF1() for _ = 1:300]` with key `random_test_F1s` ✔ (32.8 s)
Saving output at [C:\Users\tfiers\.julia\MemDiskCache.jl\2023-09-20__STA_conntest_for_diff_recording_quality_n_durations\random_test_F1s.jld2] … ✔
mean(random_test_F1s), median(random_test_F1s)
(0.403, 0.404)

Vary recording duration

durations = [10seconds, 30seconds, 1minute, 2minutes, 4minutes, 10minutes, 20minutes, 30minutes, 1hour]
VI_SNR = 40;
rows = []
@time for duration in durations
    for seed in 1:5
        kw = (; VI_SNR, duration, seed)
        key = "sim_and_test$(kw)__sweep_row"
        row = @cached key begin
            (; sweep) = sim_and_test(; kw...)
            F1max = maxF1(sweep)
            (; AUC) = calc_AUROCs(sweep)
            row = (; duration, seed, F1max, AUC)
        end
        push!(rows, row)
        # println("\n", row, "\n"^2)
    end
end;
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 10, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 10, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 10, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 10, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 10, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 30, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 30, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 30, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 30, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 30, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 60, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 60, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 60, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 60, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 60, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 120, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 120, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 120, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 120, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 120, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 240, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 240, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 240, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 240, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 240, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 600, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 600, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 600, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 600, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 600, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1200, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1200, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1200, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1200, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1200, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1800, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1800, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1800, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1800, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 1800, seed = 5)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 3600, seed = 1)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 3600, seed = 2)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 3600, seed = 3)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 3600, seed = 4)__sweep_row` from memory
[ Info: Loading `sim_and_test(VI_SNR = 40, duration = 3600, seed = 5)__sweep_row` from memory
  0.410372 seconds (44.53 k allocations: 2.983 MiB)

Running all this took:

6687.8 / minutes
111

Holy damn, discovered a bug.
(For 10second duration: exc and inh test instant, but uncon suspiciously slow).
Reason: in get_trains_to_test (from lib/Nto1.jl), duration is a global var.
[This is one discovered disadvantage of a script vs a module!! Name leakage vs encapsulation].

ok, turned out this didn’t have an effect on the results. good.

df = df_duration = DataFrame(rows)
df.duration = df.duration / minutes
df
45×4 DataFrame
35 rows omitted
RowdurationseedF1maxAUC
Float64Int64Float64Float64
10.16710.460.303
20.16720.450.276
30.16730.4540.291
40.16740.4470.296
50.16750.4170.267
⋮⋮⋮⋮⋮
416010.8790.893
426020.8860.891
436030.8670.862
446040.8250.843
456050.8560.87
xscale = "log"
xlabel = "Recording duration"
xticklabels = ["6 sec", "1 min", "10 min", "1 hr 40"]
kw = (; xscale, xlabel, xticklabels);
plot_F1(df_duration, :duration; kw...);
# savefig_phd("STA_perf_diff_dur_F1")
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_75_0.png
plot_AUC(df_duration, :duration; kw..., chance_level_loc=:right);
# savefig_phd("STA_perf_diff_dur_auc")
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_76_0.png
xlabel = ("Recording duration", :fontweight=>"bold")
xscale = "log"
xticklabels = ["6 sec", "1 min", "10 min", "1 hr 40"]
fig, (ax_top, ax_btm) = plt.subplots(nrows=2, figsize=(mtw,1.1mtw), height_ratios=[1,1.2])
plot_F1(df_duration, :duration; xscale, ylim=[0.4, 1], ax=ax_top, title=false, xlabel=nothing)
rm_ticks_and_spine(ax_top, "bottom")
plot_AUC(df_duration, :duration;
         ylim=[0, 1], ax=ax_btm, title=false, xscale, xlabel, xticklabels, chance_level_loc=:right);
fig.suptitle("Connection detection performance of STA test", x=0.5155)
plt.tight_layout(h_pad=1.7);
savefig_phd("STA_perf_diff_rec_duration");
Saved at `../thesis/figs/STA_perf_diff_rec_duration.pdf`
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_77_1.png

Computation time

Read out from cache files on disk.

VI_SNR = 40;
rows = []
@time for duration in durations
    for seed in 1:5
        key = "sim_and_test" * string((; duration, VI_SNR, seed)) * "__rows"
        runtime = get_runtime(key)
        row = (; duration, seed, runtime)
        push!(rows, row)
        # println("\n", row, "\n"^2)
    end
end;
  2.007167 seconds (2.14 M allocations: 127.208 MiB, 1.46% gc time)
df = df_runtimes = DataFrame(rows)
rename!(df, :duration => :sim_duration)
df.sim_duration .= df.sim_duration / minutes
df.runtime .= df.runtime / minutes

dfm = combine(groupby(df, :sim_duration), :runtime => mean => :runtime)
9×2 DataFrame
Rowsim_durationruntime
Float64Float64
10.1670.0299
20.50.0679
310.134
420.316
540.68
6101.68
7203.36
8305.07
96010.8
using Printf

fmt((x,y)::NTuple{2, Float64}) = fmt(fmt(x), fmt(y))
fmt(x::String, y::String) = "($x$y)"
fmt(minutes::Float64) = (
    minutes < 1   ?  str(60*minutes) * " sec"  :
    minutes  60  ?  str(minutes/60) * " hr"   :
                     str(minutes)    * " min"
)
str(num) = @sprintf "%.2g" num;
ax = newax()
x1 = df.sim_duration[1]
x2 = df.sim_duration[end]
ax.plot([x1,x2], [x1,x2], "--", c="lightgray");
plot_dots_and_means(
    df_runtimes, :sim_duration, :runtime; xscale="log", yscale="log", ax,
    xticklabels,
    xlabel = (xlabel, :fontweight => "bold"),
    ylabel = "Compute time (minutes)",
)
xy(i) = (dfm.sim_duration[i], dfm.runtime[i])
ax.annotate(xy=xy(1), text=fmt(xy(1)), va="top", textcoords="offset points", xytext=(3, -3), color="gray")
ax.annotate(xy=xy(6), text=fmt(xy(6)), va="top", textcoords="offset points", xytext=(3, -3), color="gray")
ax.annotate(xy=xy(9), text=fmt(xy(9)), va="top", textcoords="offset points", xytext=(3, -3), color="gray")
axtitle(ax,
    "Time taken to test 300 connections using STAs",
    "with 100 shuffle-STAs per tested connection",
)
savefig_phd("STA_compute_time")
Saved at `../thesis/figs/STA_compute_time.pdf`
../_images/2023-09-20__STA_conntest_for_diff_recording_quality_n_durations_83_1.png

Compute time detail

  1. How long does one tested conn take (101 STAs).

  2. How long does one calc_STA take.

And where is this time spent (profiling).