Sto scrivendo un pacchetto per l'inferenza bayesiana utilizzando il campionamento Gibbs. Poiché questi metodi sono generalmente computazionalmente costosi, sono molto preoccupato per le prestazioni del mio codice. In effetti, la velocità è stata la ragione per cui mi sono trasferito da Python a Julia.Come analizzare l'allocazione della memoria Julia ei risultati della copertura del codice
Dopo l'implementazione di Dirichlet Process Model ho analizzato il codice utilizzando Coverage.jl e l'opzione di riga di comando --track-allocation=user
.
Ecco la copertura risulta
- #=
- DPM
-
- Dirichlet Process Mixture Models
-
- 25/08/2015
- Adham Beyki, [email protected]
-
- =#
-
- type DPM{T}
- bayesian_component::T
- K::Int64
- aa::Float64
- a1::Float64
- a2::Float64
- K_hist::Vector{Int64}
- K_zz_dict::Dict{Int64, Vector{Int64}}
-
- DPM{T}(c::T, K::Int64, aa::Float64, a1::Float64, a2::Float64) = new(c, K, aa, a1, a2,
- Int64[], (Int64 => Vector{Int64})[])
- end
1 DPM{T}(c::T, K::Int64, aa::Real, a1::Real, a2::Real) = DPM{typeof(c)}(c, K, convert(Float64, aa),
- convert(Float64, a1), convert(Float64, a2))
-
- function Base.show(io::IO, dpm::DPM)
- println(io, "Dirichlet Mixture Model with $(dpm.K) $(typeof(dpm.bayesian_component)) components")
- end
-
- function initialize_gibbs_sampler!(dpm::DPM, zz::Vector{Int64})
- # populates the cluster labels randomly
1 zz[:] = rand(1:dpm.K, length(zz))
- end
-
- function DPM_sample_hyperparam(aa::Float64, a1::Float64, a2::Float64, K::Int64, NN::Int64, iters::Int64)
-
- # resampling concentration parameter based on Escobar and West 1995
352 for n = 1:iters
3504 eta = rand(Distributions.Beta(aa+1, NN))
3504 rr = (a1+K-1)/(NN*(a2-log(NN)))
3504 pi_eta = rr/(1+rr)
-
3504 if rand() < pi_eta
0 aa = rand(Distributions.Gamma(a1+K, 1/(a2-log(eta))))
- else
3504 aa = rand(Distributions.Gamma(a1+K-1, 1/(a2-log(eta))))
- end
- end
352 aa
- end
-
- function DPM_sample_pp{T1, T2}(
- bayesian_components::Vector{T1},
- xx::T2,
- nn::Vector{Float64},
- pp::Vector{Float64},
- aa::Float64)
-
1760000 K = length(nn)
1760000 @inbounds for kk = 1:K
11384379 pp[kk] = log(nn[kk]) + logpredictive(bayesian_components[kk], xx)
- end
1760000 pp[K+1] = log(aa) + logpredictive(bayesian_components[K+1], xx)
1760000 normalize_pp!(pp, K+1)
1760000 return sample(pp[1:K+1])
- end
-
-
- function collapsed_gibbs_sampler!{T1, T2}(
- dpm::DPM{T1},
- xx::Vector{T2},
- zz::Vector{Int64},
- n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64; max_clusters::Int64=100)
-
-
2 NN = length(xx) # number of data points
2 nn = zeros(Float64, dpm.K) # count array
2 n_iterations = n_burnins + (n_samples)*(n_lags+1)
2 bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:dpm.K+1]
2 dpm.K_hist = zeros(Int64, n_iterations)
2 pp = zeros(Float64, max_clusters)
-
2 tic()
2 for ii = 1:NN
10000 kk = zz[ii]
10000 additem(bayesian_components[kk], xx[ii])
10000 nn[kk] += 1
- end
2 dpm.K_hist[1] = dpm.K
2 elapsed_time = toq()
-
2 for iteration = 1:n_iterations
-
352 println("iteration: $iteration, KK: $(dpm.K), KK mode: $(indmax(hist(dpm.K_hist,
- 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time")
-
352 tic()
352 @inbounds for ii = 1:NN
1760000 kk = zz[ii]
1760000 nn[kk] -= 1
1760000 delitem(bayesian_components[kk], xx[ii])
-
- # remove the cluster if empty
1760000 if nn[kk] == 0
166 println("\tcomponent $kk has become inactive")
166 splice!(nn, kk)
166 splice!(bayesian_components, kk)
166 dpm.K -= 1
-
- # shifting the labels one cluster back
830166 idx = find(x -> x>kk, zz)
166 zz[idx] -= 1
- end
-
1760000 kk = DPM_sample_pp(bayesian_components, xx[ii], nn, pp, dpm.aa)
-
1760000 if kk == dpm.K+1
171 println("\tcomponent $kk activated.")
171 push!(bayesian_components, deepcopy(dpm.bayesian_component))
171 push!(nn, 0)
171 dpm.K += 1
- end
-
1760000 zz[ii] = kk
1760000 nn[kk] += 1
1760000 additem(bayesian_components[kk], xx[ii])
- end
-
352 dpm.aa = DPM_sample_hyperparam(dpm.aa, dpm.a1, dpm.a2, dpm.K, NN, n_internals)
352 dpm.K_hist[iteration] = dpm.K
352 dpm.K_zz_dict[dpm.K] = deepcopy(zz)
352 elapsed_time = toq()
- end
- end
-
- function truncated_gibbs_sampler{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, zz::Vector{Int64},
- n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64, K_truncation::Int64)
-
- NN = length(xx) # number of data points
- nn = zeros(Int64, K_truncation) # count array
- bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:K_truncation]
- n_iterations = n_burnins + (n_samples)*(n_lags+1)
- dpm.K_hist = zeros(Int64, n_iterations)
- states = (ASCIIString => Int64)[]
- n_states = 0
-
- tic()
- for ii = 1:NN
- kk = zz[ii]
- additem(bayesian_components[kk], xx[ii])
- nn[kk] += 1
- end
- dpm.K_hist[1] = dpm.K
-
- # constructing the sticks
- beta_VV = rand(Distributions.Beta(1.0, dpm.aa), K_truncation)
- beta_VV[end] = 1.0
- π = ones(Float64, K_truncation)
- π[2:end] = 1 - beta_VV[1:K_truncation-1]
- π = log(beta_VV) + log(cumprod(π))
-
- elapsed_time = toq()
-
- for iteration = 1:n_iterations
-
- println("iteration: $iteration, # active components: $(length(findn(nn)[1])), mode: $(indmax(hist(dpm.K_hist,
- 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time \n", nn)
-
- tic()
- for ii = 1:NN
- kk = zz[ii]
- nn[kk] -= 1
- delitem(bayesian_components[kk], xx[ii])
-
- # resampling label
- pp = zeros(Float64, K_truncation)
- for kk = 1:K_truncation
- pp[kk] = π[kk] + logpredictive(bayesian_components[kk], xx[ii])
- end
- pp = exp(pp - maximum(pp))
- pp /= sum(pp)
-
- # sample from pp
- kk = sampleindex(pp)
- zz[ii] = kk
- nn[kk] += 1
- additem(bayesian_components[kk], xx[ii])
-
- for kk = 1:K_truncation-1
- gamma1 = 1 + nn[kk]
- gamma2 = dpm.aa + sum(nn[kk+1:end])
- beta_VV[kk] = rand(Distributions.Beta(gamma1, gamma2))
- end
- beta_VV[end] = 1.0
- π = ones(Float64, K_truncation)
- π[2:end] = 1 - beta_VV[1:K_truncation-1]
- π = log(beta_VV) + log(cumprod(π))
-
- # resampling concentration parameter based on Escobar and West 1995
- for internal_iters = 1:n_internals
- eta = rand(Distributions.Beta(dpm.aa+1, NN))
- rr = (dpm.a1+dpm.K-1)/(NN*(dpm.a2-log(NN)))
- pi_eta = rr/(1+rr)
-
- if rand() < pi_eta
- dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K, 1/(dpm.a2-log(eta))))
- else
- dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K-1, 1/(dpm.a2-log(eta))))
- end
- end
- end
-
- nn_string = nn2string(nn)
- if !haskey(states, nn_string)
- n_states += 1
- states[nn_string] = n_states
- end
- dpm.K_hist[iteration] = states[nn_string]
- dpm.K_zz_dict[states[nn_string]] = deepcopy(zz)
- elapsed_time = toq()
- end
- return states
- end
-
-
- function posterior{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, K::Int64, K_truncation::Int64=0)
2 n_components = 0
1 if K_truncation == 0
1 n_components = K
- else
0 n_components = K_truncation
- end
-
1 bayesian_components = [deepcopy(dpm.bayesian_component) for kk=1:n_components]
1 zz = dpm.K_zz_dict[K]
-
1 NN = length(xx)
1 nn = zeros(Int64, n_components)
-
1 for ii = 1:NN
5000 kk = zz[ii]
5000 additem(bayesian_components[kk], xx[ii])
5000 nn[kk] += 1
- end
-
1 return([posterior(bayesian_components[kk]) for kk=1:n_components], nn)
- end
-
Ed ecco l'allocazione di memoria:
- #=
- DPM
-
- Dirichlet Process Mixture Models
-
- 25/08/2015
- Adham Beyki, [email protected]
-
- =#
-
- type DPM{T}
- bayesian_component::T
- K::Int64
- aa::Float64
- a1::Float64
- a2::Float64
- K_hist::Vector{Int64}
- K_zz_dict::Dict{Int64, Vector{Int64}}
-
- DPM{T}(c::T, K::Int64, aa::Float64, a1::Float64, a2::Float64) = new(c, K, aa, a1, a2,
- Int64[], (Int64 => Vector{Int64})[])
- end
0 DPM{T}(c::T, K::Int64, aa::Real, a1::Real, a2::Real) = DPM{typeof(c)}(c, K, convert(Float64, aa),
- convert(Float64, a1), convert(Float64, a2))
-
- function Base.show(io::IO, dpm::DPM)
- println(io, "Dirichlet Mixture Model with $(dpm.K) $(typeof(dpm.bayesian_component)) components")
- end
-
- function initialize_gibbs_sampler!(dpm::DPM, zz::Vector{Int64})
- # populates the cluster labels randomly
0 zz[:] = rand(1:dpm.K, length(zz))
- end
-
- function DPM_sample_hyperparam(aa::Float64, a1::Float64, a2::Float64, K::Int64, NN::Int64, iters::Int64)
-
- # resampling concentration parameter based on Escobar and West 1995
0 for n = 1:iters
0 eta = rand(Distributions.Beta(aa+1, NN))
0 rr = (a1+K-1)/(NN*(a2-log(NN)))
0 pi_eta = rr/(1+rr)
-
0 if rand() < pi_eta
0 aa = rand(Distributions.Gamma(a1+K, 1/(a2-log(eta))))
- else
0 aa = rand(Distributions.Gamma(a1+K-1, 1/(a2-log(eta))))
- end
- end
0 aa
- end
-
- function DPM_sample_pp{T1, T2}(
- bayesian_components::Vector{T1},
- xx::T2,
- nn::Vector{Float64},
- pp::Vector{Float64},
- aa::Float64)
-
0 K = length(nn)
0 @inbounds for kk = 1:K
0 pp[kk] = log(nn[kk]) + logpredictive(bayesian_components[kk], xx)
- end
0 pp[K+1] = log(aa) + logpredictive(bayesian_components[K+1], xx)
0 normalize_pp!(pp, K+1)
0 return sample(pp[1:K+1])
- end
-
-
- function collapsed_gibbs_sampler!{T1, T2}(
- dpm::DPM{T1},
- xx::Vector{T2},
- zz::Vector{Int64},
- n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64; max_clusters::Int64=100)
-
-
191688 NN = length(xx) # number of data points
96 nn = zeros(Float64, dpm.K) # count array
0 n_iterations = n_burnins + (n_samples)*(n_lags+1)
384 bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:dpm.K+1]
2864 dpm.K_hist = zeros(Int64, n_iterations)
176 pp = zeros(Float64, max_clusters)
-
48 tic()
0 for ii = 1:NN
0 kk = zz[ii]
0 additem(bayesian_components[kk], xx[ii])
0 nn[kk] += 1
- end
0 dpm.K_hist[1] = dpm.K
0 elapsed_time = toq()
-
0 for iteration = 1:n_iterations
-
5329296 println("iteration: $iteration, KK: $(dpm.K), KK mode: $(indmax(hist(dpm.K_hist,
- 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time")
-
16800 tic()
28000000 @inbounds for ii = 1:NN
0 kk = zz[ii]
0 nn[kk] -= 1
0 delitem(bayesian_components[kk], xx[ii])
-
- # remove the cluster if empty
0 if nn[kk] == 0
161880 println("\tcomponent $kk has become inactive")
0 splice!(nn, kk)
0 splice!(bayesian_components, kk)
0 dpm.K -= 1
-
- # shifting the labels one cluster back
69032 idx = find(x -> x>kk, zz)
42944 zz[idx] -= 1
- end
-
0 kk = DPM_sample_pp(bayesian_components, xx[ii], nn, pp, dpm.aa)
-
0 if kk == dpm.K+1
158976 println("\tcomponent $kk activated.")
14144 push!(bayesian_components, deepcopy(dpm.bayesian_component))
4872 push!(nn, 0)
0 dpm.K += 1
- end
-
0 zz[ii] = kk
0 nn[kk] += 1
0 additem(bayesian_components[kk], xx[ii])
- end
-
0 dpm.aa = DPM_sample_hyperparam(dpm.aa, dpm.a1, dpm.a2, dpm.K, NN, n_internals)
0 dpm.K_hist[iteration] = dpm.K
14140000 dpm.K_zz_dict[dpm.K] = deepcopy(zz)
0 elapsed_time = toq()
- end
- end
-
- function truncated_gibbs_sampler{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, zz::Vector{Int64},
- n_burnins::Int64, n_lags::Int64, n_samples::Int64, n_internals::Int64, K_truncation::Int64)
-
- NN = length(xx) # number of data points
- nn = zeros(Int64, K_truncation) # count array
- bayesian_components = [deepcopy(dpm.bayesian_component) for k = 1:K_truncation]
- n_iterations = n_burnins + (n_samples)*(n_lags+1)
- dpm.K_hist = zeros(Int64, n_iterations)
- states = (ASCIIString => Int64)[]
- n_states = 0
-
- tic()
- for ii = 1:NN
- kk = zz[ii]
- additem(bayesian_components[kk], xx[ii])
- nn[kk] += 1
- end
- dpm.K_hist[1] = dpm.K
-
- # constructing the sticks
- beta_VV = rand(Distributions.Beta(1.0, dpm.aa), K_truncation)
- beta_VV[end] = 1.0
- π = ones(Float64, K_truncation)
- π[2:end] = 1 - beta_VV[1:K_truncation-1]
- π = log(beta_VV) + log(cumprod(π))
-
- elapsed_time = toq()
-
- for iteration = 1:n_iterations
-
- println("iteration: $iteration, # active components: $(length(findn(nn)[1])), mode: $(indmax(hist(dpm.K_hist,
- 0.5:maximum(dpm.K_hist)+0.5)[2])), elapsed time: $elapsed_time \n", nn)
-
- tic()
- for ii = 1:NN
- kk = zz[ii]
- nn[kk] -= 1
- delitem(bayesian_components[kk], xx[ii])
-
- # resampling label
- pp = zeros(Float64, K_truncation)
- for kk = 1:K_truncation
- pp[kk] = π[kk] + logpredictive(bayesian_components[kk], xx[ii])
- end
- pp = exp(pp - maximum(pp))
- pp /= sum(pp)
-
- # sample from pp
- kk = sampleindex(pp)
- zz[ii] = kk
- nn[kk] += 1
- additem(bayesian_components[kk], xx[ii])
-
- for kk = 1:K_truncation-1
- gamma1 = 1 + nn[kk]
- gamma2 = dpm.aa + sum(nn[kk+1:end])
- beta_VV[kk] = rand(Distributions.Beta(gamma1, gamma2))
- end
- beta_VV[end] = 1.0
- π = ones(Float64, K_truncation)
- π[2:end] = 1 - beta_VV[1:K_truncation-1]
- π = log(beta_VV) + log(cumprod(π))
-
- # resampling concentration parameter based on Escobar and West 1995
- for internal_iters = 1:n_internals
- eta = rand(Distributions.Beta(dpm.aa+1, NN))
- rr = (dpm.a1+dpm.K-1)/(NN*(dpm.a2-log(NN)))
- pi_eta = rr/(1+rr)
-
- if rand() < pi_eta
- dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K, 1/(dpm.a2-log(eta))))
- else
- dpm.aa = rand(Distributions.Gamma(dpm.a1+dpm.K-1, 1/(dpm.a2-log(eta))))
- end
- end
- end
-
- nn_string = nn2string(nn)
- if !haskey(states, nn_string)
- n_states += 1
- states[nn_string] = n_states
- end
- dpm.K_hist[iteration] = states[nn_string]
- dpm.K_zz_dict[states[nn_string]] = deepcopy(zz)
- elapsed_time = toq()
- end
- return states
- end
-
-
- function posterior{T1, T2}(dpm::DPM{T1}, xx::Vector{T2}, K::Int64, K_truncation::Int64=0)
0 n_components = 0
0 if K_truncation == 0
0 n_components = K
- else
0 n_components = K_truncation
- end
-
0 bayesian_components = [deepcopy(dpm.bayesian_component) for kk=1:n_components]
0 zz = dpm.K_zz_dict[K]
-
0 NN = length(xx)
0 nn = zeros(Int64, n_components)
-
0 for ii = 1:NN
0 kk = zz[ii]
0 additem(bayesian_components[kk], xx[ii])
0 nn[kk] += 1
- end
-
0 return([posterior(bayesian_components[kk]) for kk=1:n_components], nn)
- end
-
Quello che non mi sembra di capire è il motivo per esempio una semplice assegnazione che corre solo due volte, alloca 191688 unità (presumo che l'unità sia Byte, non sono sicuro però).
.cov:
2 NN = length(xx) # number of data points
.mem:
191688 NN = length(xx) # number of data points
o questa è peggio:
COV:
352 @inbounds for ii = 1:NN
mem:
Qual è il valore di 'NN'? Consiglierei di usare il profiler per identificare prima le parti lente, prima dell'allocazione della memoria. – IainDunning
Ho usato il profiler e 'NN = length (xx)' non è un collo di bottiglia. 'xx' è' :: Vector {Float64} 'quindi NN è semplicemente un numero intero, in questo esperimento uguale a 5000. – Adham