Localization processes for functional data analysis. Software companion for the paper “Localization processes for functional data analysis” by Elías, A., Jiménez, R., and Yukich, J. (2020)
Software companion for the paper “Localization processes for functional
data analysis” by Elías, Antonio, Jiménez, Raúl, and Yukich, Joe, (2020)
\
processes and localization distances and their application to
classification and outlier detection problems.
#install the package
devtools::install_github("aefdz/localFDA")
## v checking for file 'C:\Users\anton\AppData\Local\Temp\Rtmp4617Sq\remotes2e00503a197c\aefdz-localFDA-25b0d40/DESCRIPTION' (425ms)
## - preparing 'localFDA':
## checking DESCRIPTION meta-information ... checking DESCRIPTION meta-information ... v checking DESCRIPTION meta-information
## - checking for LF line-endings in source and make files and shell scripts
## - checking for empty or unneeded directories
## - looking to see if a 'data/datalist' file should be added
## - building 'localFDA_0.0.0.9000.tar.gz'
##
##
#load the package
library(localFDA)
Load the example data and plot it.
X <- exampleData
n <- ncol(X)
p <- nrow(X)
t <- as.numeric(rownames(X))
#plot the data set
df_functions <- data.frame(ids = rep(colnames(X), each = p),
y = c(X),
x = rep(t, n)
)
functions_plot <- ggplot(df_functions) +
geom_line(aes(x = x, y = y, group = ids, color = ids),
color = "black", alpha = 0.25) +
xlab("t") + theme(legend.position = "none")
functions_plot
Empirical version of Equation (1) of the paper. For one focal,
focal <- "1"
localizarionProcesses_focal <- localizationProcesses(X, focal)$lc
Plot localization processes of order 1, 50, 100 and 200:
df_lc <- data.frame(k = rep(colnames(localizarionProcesses_focal), each = p),
y = c(localizarionProcesses_focal),
x = rep(t, n-1)
)
lc_plots <- list()
ks <- c(1, 50, 100, 200)
for(i in 1:4){
lc_plots[[i]] <- functions_plot +
geom_line(data = filter(df_lc, k == paste0("k=", ks[i])),
aes(x = x, y = y, group = k),
color = "blue", size = 1) +
geom_line(data = filter(df_functions, ids == focal),
aes(x = x, y = y, group = ids),
color = "red", linetype = "dashed", size = 1)+
ggtitle(paste("k = ", ks[i]))
}
wrap_plots(lc_plots)
Equation (18) of the paper. For one focal,
localizationDistances_focal <- localizationDistances(X, focal)
head(localizationDistances_focal)
## k=1 k=2 k=3 k=4 k=5 k=6
## 0.0005082926 0.0011346495 0.0017636690 0.0023955745 0.0030095117 0.0035089220
Plot the localization distances:
df_ld <- data.frame(k = names(localizationDistances_focal),
y = localizationDistances_focal,
x = 1:c(n-1)
)
ldistances_plot <- ggplot(df_ld, aes(x = x, y = y)) +
geom_point() +
ggtitle("Localization distances for one focal") +
xlab("kth") + ylab("L")
ldistances_plot
localizationStatistics_full <- localizationStatistics(X, robustify = TRUE)
#See the mean and sd estimations for k = 1, 100, 200, 400, 600
localizationStatistics_full$trim_mean[c(1, 100, 200, 400, 600)]
## k=1 k=100 k=200 k=400 k=600
## 0.001083517 0.098465426 0.184940365 0.350528860 0.526580274
localizationStatistics_full$trim_sd[c(1, 100, 200, 400, 600)]
## k=1 k=100 k=200 k=400 k=600
## 0.0005326429 0.0329170846 0.0490732397 0.0686018224 0.0806314699
X <- classificationData
ids_training <- sample(colnames(X), 90)
ids_testing <- setdiff(colnames(X), ids_training)
trainingSample <- X[,ids_training]
testSample <- X[,ids_testing]; colnames(testSample) <- NULL #blind
classNames <- c("G1", "G2")
classification_results <- localizationClassifier(trainingSample, testSample, classNames, k_opt = 3)
checking <- data.frame(real_classs = ids_testing,
predicted_class =classification_results$test$predicted_class)
checking
## real_classs predicted_class
## 1 12_G1 G1
## 2 14_G1 G1
## 3 21_G1 G1
## 4 44_G1 G1
## 5 54_G2 G2
## 6 56_G2 G2
## 7 72_G2 G2
## 8 81_G2 G2
## 9 94_G2 G2
## 10 100_G2 G2
X <- outlierData
outliers <- outlierLocalizationDistance(X, localrule = 0.95, whiskerrule = 1.5)
outliers$outliers_ld_rule
## [1] "1_magnitude" "1_shape" "2_magnitude" "2_shape"
Plot results,
df_functions <- data.frame(ids = rep(colnames(X), each = nrow(X)),
y = c(X),
x = rep(seq(from = 0, to = 1, length.out = nrow(X)), ncol(X)))
functions_plot <- ggplot(df_functions) +
geom_line(aes(x = x, y = y, group = ids),
color = "black") +
xlab("t") +
theme(legend.position = "bottom")+
geom_line(data = df_functions[df_functions$ids %in% outliers$outliers_ld_rule,], aes(x = x, y = y, group = ids, color = ids), size = 1) +
guides(color = guide_legend(title="Detected outliers"))
functions_plot
Elías, Antonio, Jiménez, Raúl and Yukich, Joe (2020). Localization
processes for functional data analysis
[https://arxiv.org/abs/2007.16059]https://arxiv.org/abs/2007.16059.