For this analysis, we'll start by making a standard curve from a laboratory data and then use this with residence time distribution theory to determine the residence times and the equalibrium concentration based on annual dust deposition.
The Standard Curve is based on a laboratory bomb-style experiment where trace element concentrations were recorded following an initial large-deposition event. Both particulate and dissolved forms were measured.
The model used was inspired by enzyme kinetics and allows for a residual concentration to remain as $t \to \inf$ (most functional responses assume that C would return to 0 or inital conditions.).*
$$C(t) = Y - \frac{At}{t+K}$$Here $Y$ is the initial concentration after the depositional event, $A$ is a scale factor that controls the residual concentration as $t\to\inf$, and $K$ controls the speed, or velocity, of decay.
To fit the model, we used a iterative approach using the Sum Squared Residuals (SSR) as a cost function. With each sampling iteration, a set of $A$ and $K$ were fit to a randomly sampled set (with replacement) of data. The quality of the resulting fit was determined (via SSR).
Once the fits were made, the parameters were saved and the resulting $R^2$ values were computed:
$$R^2 = 1 - SSR/SST$$where $SSR = \sum{(C(t) - x_i)^2}$ and $SST = \sum{x_i-\bar{x}}$.
In this section, the original data is scaled for use in the fitting function (to normalize the paramaters used to fit the function inside the model() function. For conveiance, concentrations are converted to percentage of the inital value.
*Other functional fits were tested including exponential and power but they resulting in a poor fit. Additionally, this model maintains just three fit parameters (Y, A and K), the minimum degrees of freedom required in order to allow a residual concentration (i.e. residual, slope, intercept).
## Load the data from csv
data = read.csv("~/Dropbox/Documents/Alina_Tank Concentrations V4.csv")
## Clean up the data structure and move time to own variable.
data = data[-1,]
Time = data$Time*60-60 # minutes from first sampling point
data = data[,-1] * 1e3 # ug/L --> ug/m3
print('Raw Data')
head(data)
## Scaled 0->100% data for use in fitting the model.
data.s = data
for (i in 1:ncol(data)) {
data.s[,i] = data[,i]/max(data[,i]) * 100
}
print('Scaled data')
head(data.s)
## Load dust flux data, convert to minutes
Dust = read.csv('~/Dropbox/Documents/Alina_Dust Fluxes Combined.csv') #ug m-2 min-1
Dust = Dust[,-1]
str(Dust)
This code block is used to determine the quality of the fit (or cost) which is then optimized. The first function, fit, calculates the sum-of-squares error associated with the second function, fitted, and the data, d.
The current functional form is:
$f = params[1] - \frac{params[2] \cdot t}{t+params[3]}$
## Helper Functions
fit = function(p, t, d) {
dif = fitted(p[1], p[2], p[3], t) - d
sum(dif^2)
}
fitted = function(Y, A, K, t) {
{Y-A*t/{t+K}}
}
fitted.p = function(p, t) {
{p[1]-p[2]*t/{t+p[3]}}
}
The model() function is used to generate a fit for the data using the function fit() to determine the quality. The model is provided the data and the index j which is the column within dat to fit.
This model uses a normally distributed relative error adjustment to the original data since this result is used for bootstrap the variance (i.e. we allow the concentrations to vary with stdev of 10%). Once an inital fit is calcualted (i.e. K = 1), the loop then iteratively adjusts the fit using a gradient descent technique. For this model, Y, A and K are all adjusted.
model = function(j, data.s) {
d = data.s[,j]
dd = rnorm(length(d), d, d*0.1)
## Inital Guess
A = 80
K = 100
Y = max(dd)
#Fit model (nonlinear gradient method)
res = nlm(fit, c(Y, A, K), Time, dd, stepmax=10)
# New params
Y = res$estimate[1]
A = res$estimate[2]
K = res$estimate[3]
return(data.frame(j,colnames(data.s)[j], Y, A, K, res$minimum))
}
The model is then run for n times for each of the elements and the results are saved in the well-named variable temp.
temp = model(1, data.s)
n = 100
for (j in 1:ncol(data.s)) {
for (i in 1:n) {
temp = rbind(temp, model(j, data.s))
}
}
for (i in 1:ncol(data.s)) {
l = temp$j==i
temp$A[l] = temp$A[l]/100.0 * max(data[,i])
temp$Y[l] = temp$Y[l]/100.0 * max(data[,i])
}
str(temp)
# Optional output plotting the Fit lines with the data.
pdf('~/temp/Alina_Fits.pdf')
par(mfrow=c(2,2))
#if (0) {
for (j in 1:ncol(data)) {
l = (temp$j == j)
plot(Time, data[,j], xlab='Time (hr)', ylab=colnames(data)[j])
for (i in c(1:nrow(temp))) {
if (l[i]) {
lines(Time, fitted(temp$Y[i], temp$A[i], temp$K[i], Time), col='#FF000003')
}
}
}
dev.off()
temp$R2 = 0
## Add Dust fluxes to models
temp$MJMin = 0 # ug m-2 min-1
temp$MJAvg = 0
temp$MJMax = 0
temp$SJMin = 0
temp$SJAvg = 0
temp$SJMax = 0
for (i in 1:nrow(temp)) {
k = temp$j[i]
temp$R2[i] = 1-temp$res.minimum[i]/sum((data[,k]-mean(data[,k]))^2)
## Moroccan
temp$MJMin[i] = as.numeric( Dust[3, k] - Dust[4, k] )
temp$MJAvg[i] = as.numeric( Dust[3, k] )
temp$MJMax[i] = as.numeric( Dust[3, k] + Dust[4, k] )
## SW
temp$SJMin[i] = as.numeric( Dust[1, k] - Dust[2, k] )
temp$SJAvg[i] = as.numeric( Dust[1, k] )
temp$SJMax[i] = as.numeric( Dust[1, k] + Dust[2, k] )
}
str(temp)
#write.csv(ans, '~/Desktop/R2.csv')
Based on RTD theory, residence time can be calculated as follows which yields a curve of the proportion of particles of a particular residence time.
$E(t) = \frac{C(t)}{\int^\infty_0C(t)dt}$
For our discrete data then,
$E(t) = \frac{C(t)}{\sum C(t)\Delta t}$
From this function, the residence time distribution, the residence time can be calculated by this formula and is simply the area under the curve defining the residence time relative abundance (its a bit complicated, but a simple explaination is possible upon request):
$\tau = \int^\infty_0t\cdot E(t)dt = \sum (t \cdot E(t)\Delta t)$
## Calculate E(t) and t*E(t) for each model fit
t = seq(0,24*100,1)
temp$ETau = 0
for (i in 1:nrow(temp)) {
s = sum((temp$A[i] - temp$A[i] * t/(t+temp$K[i])))
# E(t) is:
e = (temp$A[i] - temp$A[i] * t/(t+temp$K[i])) / s
#save value in temp
temp$ETau[i] = sum(t * e)
}
## Let's calculate the resulting average residence time for each element:
m = c()
s = c()
for (i in 1:ncol(data)) {
m = c(m, mean(temp$ETau[temp$j == i]))
s = c(s, sd(temp$ETau[temp$j == i]))
}
data.frame(n=c(1:ncol(data)), m/60, s/60)
Calculate sml concentration profile over time:
Use results from above to calculate Eq and Tau
## initialize setup
library(compiler)
enableJIT(3)
w = 47e-6 ## 47 um
d = seq(0,25, 0.1) ## Depth domain (100 micro resolution)
dz = d[2] - d[1]
k2 = 1e-3/60 # per minute
rr = 60*100 # number of minutes to run model for
#########################################
##~~~ Calculate Diffusion Profile ~~~~ ##
#########################################
dprofile = data.frame(matrix(0, nrow=length(d), ncol=10))
dhistory = data.frame(matrix(0, ncol=10, nrow=rr))
f = 2
end = length(d)
dt = 1.5/f # s
dust = Dust[3,1:10] * 60 * 24 * 30 * 12 # dust flux per year
dprofile[which(d < 5.6),] = t(dust) / 5.6e-3
D = f*as.numeric(DiffusionCoefficients[1:10] * 1e6) ## mm^2/s
for (i in 1:(60)) { ## each loop is a minute
for (j in 1:(60/dt)) { ## Loop for every minute (each k is a dt second)
for (k in 1:ncol(dprofile)) {
#for (k in 1:2) {
d2C = dprofile[c(2:end,end), k] + dprofile[c(1,1:(end-1)), k] - 2 * dprofile[1:end,k]
d2C = as.numeric(d2C)
dprofile[,k] = dprofile[,k] + D[k] * (d2C/dz^2) * dt - dprofile[,k] * k2 * dt#
}
}
}
dt = 2 * dt
ll = which(d < .047)
for (i in 1:rr) { ## each loop is a minute
for (j in 1:(60/dt)) { ## Loop for every minute (each k is a dt second)
for (k in 1:ncol(dprofile)) {
#for (k in 1:2) {
d2C = dprofile[c(2:end,end), k] + dprofile[c(1,1:(end-1)), k] - 2 * dprofile[1:end,k]
d2C = as.numeric(d2C)
dprofile[,k] = dprofile[,k] + D[k] * (d2C/dz^2) * dt - dprofile[,k] * k2 * dt#
}
}
dhistory[i,] = sapply(dprofile[ll,], mean)
}
#pdf('~/Desktop/RTD-k-3.pdf')
par(mfrow=c(2,2))
for(i in 1:10) {
plot(c(1:(nrow(dhistory)))*dhistory[,i]/sum(dhistory[,i]),
type='l', main=colnames(data)[i],
xlab='Time (min)', ylab='t*E(t)')
tt = data[,i]/1e3
tt = (tt-min(tt)) / (1 - min(tt)/max(tt))
T = Time - c(60,Time[1:(length(Time)-1)])
points(Time, Time * tt/sum(tt * T), col='#ff000050')
text(5000,.25,
paste('Data Tau =', format(sum(Time * tt/sum(tt * T) * T)/60,digits=3), 'hr'), cex=0.6, col='#ff000080')
text(5000,0.27,
paste('Diffusion Tau =',
format(sum(c(61:(nrow(dhistory)+60))*dhistory[,i]/sum(dhistory[,i]))/60, digits=3),
'hr'), cex=0.6)
#text(5000,0.29, paste('Fit Tau =', format(m[i]/60,digits=3), 'hr'), cex=0.6, col='blue')
l = temp$j == i
tim = c(1:6e3)
A = mean(temp$A[l])
K = mean(temp$K[l])
Y = mean(temp$Y[l])
s = sum((A - A * tim/(tim+K)))
# E(t) is:
e = (A - A * tim/(tim+K)) / s -3.5e-5
lines(tim, e*tim, cex=0.6, pch=4, col='red')
text(5000,0.29, paste('Fit Tau =', format(sum(e*tim)/60,digits=3), 'hr'), cex=0.6, col='red')
}
#dev.off()
save(dhistory, file='~/Dropbox/Documents/Alina_dhistory2.rdata')
#load('~/Dropbox/Documents/Alina_dhistory.rdata')
#pdf('~/Desktop/Alina_Data versus diffusion model (rel and abs).pdf')
par(mfrow=c(2,2))
for(n in 1:10) {
tt = data.s[,n]/100
tt = (tt-min(tt)) / (1 - min(tt)/max(tt))
plot(Time, tt, xlim=c(0,1500), ylim=c(0,1),
main=colnames(data)[n], xlab="Time (min)", ylab="Rel Conc.")
lines(c(1:nrow(dhistory))-60, dhistory[,n]/dhistory[60,n], pch=2, col='#FF0000')
#legend(1000, 1, c("k = 1e-2", "k = 1e-3"), col=c('#FF0000','#FF00F0'), lwd=3, cex=0.6)
tt = data[,n]/1e3
tt = (tt-min(tt)) / (1 - min(tt)/max(tt))
plot(Time, tt, xlim=c(0,1500),ylim=c(0,max(tt)*1.25),
main=colnames(data)[n], xlab="Time (min)", ylab="Conc. (ug/L)")
lines(c(1:nrow(dhistory))-60, dhistory[,n], pch=2, col='#FF0000')
#legend(1000, max(tt)*1.1, c("k = 1e-2", "k = 1e-3"), col=c('#FF0000','#FF00F0'), lwd=3, cex=0.6)
}
#dev.off()
Steps
## load and prep information about particles
particles = read.csv('~/Dropbox/Documents/Alina_Tank Model Size Spectrum.csv')
particles$k = k2 * particles$SA.m2. / sum(particles$SA.m2.)
particles$m = particles$D.µm.^3 * 4*pi/3 * 1e-18
particles$m = particles$m/sum(particles$m)
particles$v.ms = particles$v.ums * 1e-6
profile = data.frame(matrix(0,ncol = nrow(particles), nrow=length(d)))## Current vertical profiles
history = data.frame(matrix(0, ncol=nrow(particles), nrow=rr))## the sml concentrations of each
## Prep the workspace for the particle analysis
m = 1 # which TE
end = length(d)
dt = 1.5/f # s
dust = Dust[3,m+20] * 60 * 24 * 30 * 12 * particles$m
profile[1,] = dust / 5.6e-3
D = f*as.numeric(DiffusionCoefficients[m] * 1e6) ## mm^2/s
e = rep(1, ncol(profile)) # enrichment factor
#########################################
##~~~ Calculate Diffusion Profile ~~~~ ##
#########################################
for (i in 1:(rr)) { ## each loop is a minute
#Cd = dhistory[nrow(history), m]
for (j in 1:(60/dt)) { ## Loop for every minute (each k is a dt second)
for (k in 1:ncol(profile)) { # each particle
d2C = profile[c(2:end,end),k] + profile[c(1,1:(end-1)),k] - 2 * profile[,k]
## Diffusion
profile[,k] = profile[,k] + D * (d2C/dz^2) * dt
## Scavenging
#e[k] = e[k] + (Cd * k2 * particles$k[k] * dt) / profile[1,k]
## Settling
profile[,k] = profile[,k] * (1-particles$v.ms[k]/w * dt)
profile[profile[,k] < 0, k] = 0
}
}
history[i,] = profile[1,] * e
}
#save(history, file=paste0('~/Dropbox/Documents/Alina_history-',colnames(data)[n] , '.rdata'))
load('~/Dropbox/Documents/Alina_history-Al-l.rdata')
pdf(paste0('./Particulate Model Results-', colnames(data)[m], '.pdf'))
par(mfrow=c(2,2))
tt = data.s[,m+10]/100
tt = (tt-min(tt)) / (1 - min(tt)/max(tt))
plot(Time, tt, xlim=c(0,1500), ylim=c(0,1),
main=colnames(data)[m], xlab="Time (min)", ylab="Rel Conc.")
lines(c(1:nrow(history)), apply(history, 1, sum)/apply(history, 1, sum)[1], pch=2, col='#FF0000')
for (i in 1:ncol(history)) {
lines(c(1:nrow(history)), history[,i]/apply(history, 1, sum)[1], pch=2, col='#FF000020')
}
tt = data[,n+10]/1e3
tt = (tt-min(tt)) / (1 - min(tt)/max(tt))
plot(Time, tt, xlim=c(0,1500),ylim=c(0,max(tt)*1.25),
main=colnames(data)[m], xlab="Time (min)", ylab="Conc. (ug/L)")
lines(c(1:nrow(history)), apply(history, 1, sum), pch=2, col='#FF0000')
for (i in 1:ncol(history)) {
lines(c(1:nrow(history)), history[,i], pch=2, col='#FF000020')
}
dev.off()
Calculate the observed diffusion coefficient from sulfate data.
Also make animations.
# interpolate sulfate data onto modeldata$Time
modeldata$sulfate1 = 0
modeldata$sulfate2 = 0
sulfate_d1 = .05
sulfate_d2 = 300 #30cm
for(i in 1:nrow(modeldata)) {
i1 = which.min(abs(sulf$time-modeldata$Time[i]))
if ( sulf$time[i1] < modeldata$Time[i]) {
i2 = i1 + 1
} else {
if (i1 == 1) {
i2 = 2
} else {
i2 = i1 -1
}
}
m1 = (sulf$sml[i2] - sulf$sml[i1]) / (sulf$time[i2] - sulf$time[i1])
m2 = (sulf$uwc[i2] - sulf$uwc[i1]) / (sulf$time[i2] - sulf$time[i1])
modeldata$sulfate1[i] = m1 * (modeldata$Time[i] - sulf$time[i1]) + sulf$sml[i1]
modeldata$sulfate2[i] = m2 * (modeldata$Time[i] - sulf$time[i1]) + sulf$uwc[i1]
}
summary(modeldata$sulfate1)
i=380
plot(c(2:ncol(modeldata)*dz), 0.90 * (modeldata[i,2:ncol(modeldata)] - 25) + 25,
xlab="Depth (mm)", ylab="Concentration",
pch=4, cex=0.5,
ylim=c(0,25))
points(sulfate_d1, modeldata$sulfate1[i], col='blue')
points(sulfate_d2, modeldata$sulfate2[i], col='blue')
text(100, 0.025, paste(format(modeldata$Time[i]/60, digits=3), 'minutes'))
for (i in 1:nrow(modeldata)) {
# creating a name for each plot file with leading zeros
if (i < 10) {name = paste('0000',i,'plot.png',sep='')}
if (i < 100 && i >= 10) {name = paste('000',i,'plot.png', sep='')}
if (i >= 100) {name = paste('00', i,'plot.png', sep='')}
if (i >= 1000) {name = paste('0', i,'plot.png', sep='')}
#saves the plot as a .png file in the working directory
png(paste("./images/", name, sep=''))
plot(c(2:ncol(modeldata)*dz), modeldata[i,2:ncol(modeldata)],
xlab="Depth (mm)", ylab="Concentration",
pch=4, cex=0.5,
ylim=c(10,25))
points(sulfate_d1, modeldata$sulfate1[i]*1, col='blue')
points(sulfate_d2, modeldata$sulfate2[i]*1, col='blue')
text(400, 15, paste(format(modeldata$Time[i]/60, digits=3), 'minutes'), cex=0.7)
dev.off()
}
for (i in 1:ncol(data)) {
write.csv(temp[temp$j == i,], paste0('~/temp/Model_', colnames(data)[i], '.csv'))
}
summary = data.frame(Element=0, Y=0, Ys=0, A=0, As=0, K=0, Ks=0, Eq=0, Eqs=0, Tau=0, Taus=0)
for (i in 1:ncol(data)) {
l = (temp$j == i)
temps = temp[l,]
l = which(!is.na(temps$MEQ.avg))
summary = rbind(summary, c(colnames(data[i]), mean(temps$Y[l]), sd(temps$Y[l]),
mean(temps$A[l]), sd(temps$A[l]),
mean(temps$K[l]), sd(temps$K[l]),
mean(temps$MEQ.avg[l]), sd(temps$MEQ.avg[l]),
mean(temps$MTau.avg[l]), sd(temps$MTau.avg[l]))
)
}
write.csv(summary, '~/temp/Hyperbolic Summary.csv')
head(summary)
par(mfrow=c(3,3))
for (i in 1:ncol(data)) {
plot(Time, data[,i], ylim=c(0,max(data[,i])), main=colnames(data)[i])
}