0% found this document useful (0 votes)

15 views

Code

Uploaded by

Classy Man

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

15 views

Code

Uploaded by

Classy Man

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 25

Introduction

Read
library(xlsx)
data =read.xlsx('Classify_risk_dataset.xlsx', head=TRUE, 1,
stringsAsFactors=T)

cleveland_data <- read.csv('cleveland_data.txt',header = T,sep =

",",dec = ".",stringsAsFactors = F)
Apply

apply(data, 1, sum) # dataframe

unlist(lapply(1:2,function(x){sum(M[,x])})) # dataframe columns

sapply(1:2,function(x){sum(M[,x])})

tapply(summary variable to apply function on, filter variable used to

split data, function to apply to resulting sets)

 tapply(medical.data$age, medical.data$treatment, mean)

Others
cbind , rbind

df = data.frame(array1,array2) # tipo cbind

head(df), tail(df), colnames(df), nrow(df), str(df)

df$col1 EQUAL TO df[,1] EQUAL TO df[,’col1’]

df['vol'] NOT EQUAL TO df[,’vol’]

> df['vol']

vol

1 5

2 6

3 7
4 8

> df[,'vol']

[1] 5 6 7 8

mydata[mydata$e=='red', c('d', 'e')] #or

mydata[mydata['e']=='red',’d’]

mydata[which(mydata$e== 'red'& mydata$d <=1),]

mydata[,-c(1, 2)]

Soil=Soil[order(-Soil$pH),] # order by decreasing

> Soil[nrow(Soil)+1,]<-apply(Soil,2,sum, na.rm = TRUE)

> rownames(Soil)[nrow(Soil)]<- ' Total'

Data Preparation
Read file
cleveland_data <- read.csv('cleveland_data.txt',header = T,sep =
",",dec = ".",stringsAsFactors = F)

Change colnames
colnames(data_switzerland)[colnames(data_switzerland)=="AGE"] <- "age"
#or colnames(switzerland_data)[4]<-'age'
ID column
data$id <- 1:nrow(data)

Functions
mean(data$age)
sd(data$age)
min(data$age)
max(data$age)
NA treatment
data[data=='?']<-NA
Cols to factos
cols_to_factors <- c('origin','sex','cp','fbs','slope','num')
for (i in cols_to_factors){
data[,i] <- as.factor(data[,i])
}

Data cleaning
Remove observations with 3 or more missing values
aux <- apply(data,1,function(x)sum(is.na(x)))
length(aux[aux<3]) #581 observations
data <- data[which(aux<3),]
Replace NA with regression results
model<-lm(PO4 ~ oPO4, data = algae)
AND
algae[28, "PO4"] <- model[["coefficients"]][1] +
model[["coefficients"]][2] * algae[28, "oPO4"]
OR (if missing values)
fillPO4 <- function(oP) {
if (is.na(oP))
return(NA)
else
(return(model[["coefficients"]][1]+model[["coefficients"]][2]*oP))}
algae[is.na(algae$PO4), "PO4"] <- sapply(algae[is.na(algae$PO4),
"oPO4"], fillPO4)
Replace NA with mode
for (i in c('fbs', 'slope', 'thal')){
data[is.na(data[,i]),i] <-
as.numeric(names(sort(-table(data[is.na(data[,i]),i])))[1])}
Replace NA with mean/median
algae[is.na(algae$Chla), "Chla"] <- median(algae$Chla, na.rm = T)
Replace NA with kNN
algae <- knnImputation(algae, k = 10, meth = "median")
Binning
library(infotheo)
discretize(algae$Cl, disc="equalfreq", nbins=sqrt(length(algae$Cl)))
#Create new attribute with bin values
algae$bin_2<-as.numeric(unlist(discretize(algae$Cl, disc="equalfreq",
nbins=sqrt(length(algae$Cl)))))

#Associate the mean value to each bin

for (k in 1:sqrt(NROW(algae$Cl)))
{algae$Cl[algae$bin_2 == k]<-mean(algae[algae$bin_2==k, "Cl"])}
Eliminate outliers outside 3 SD
data <- data[-which(data$chol > (mean(data$chol)+3*sd(data$chol)) |
data$chol < (mean(data$chol)-3*sd(data$chol))),]

Data transformation
Data with threshold
data$var_2 <- 0
data[which(data$thalach>150),'var_2'] <- 1
Data with threshold
> Soil$aux_2[Soil$H2O > 20]<-“higher than 20"

> Soil$aux_2[Soil$H2O <= 20]<-“lower than 20 or equal"

Normal normalization
numeric_attrib <- c('age','trestbps')

z_score_function <-
function(x) {(x - mean(x, na.rm=TRUE))/(sd(x,na.rm=TRUE))}

data[,numeric_attrib] <-
apply(data[,numeric_attrib],2, z_score_function)
Min-max normalization
new_max<-2
new_min<-0

doit <- function(x) {(x - min(x, na.rm=TRUE))/(max(x,na.rm=TRUE) -

min(x, na.rm=TRUE))*((new_max-new_min)+new_min)}

apply(data,2, doit)
Decimal scaling
maxvect <- apply(abs(mtcars), 2, max)
kvector <- ceiling(log10(maxvect))
scalefactor <- 10^kvector
normed <- scale(data, center = FALSE, scale = scalefactor)
Random Sampling
Mysample_without_replacement <- mtcars[sample(1:nrow(mtcars), 50,
replace=FALSE),]
Mysample_with_replacement <- mtcars[sample(1:nrow(mtcars), 50,
replace=TRUE),]
Stratified Sampling keeping each class distribution
p=0.7
data_sample <- data.frame()
for (i in levels(data$origin)){
data_sub <- data[data$origin==i,]
B = ceiling(nrow(data_sub)*p) #rounds up
data_sub_aux <- data_sub[sample(1:nrow(data_sub),B),]
data_sample <- rbind(data_sample,data_sub_aux)
}

Classification
holdout approach with One R
library(splitTools)
partitions <- partition(Data$RISK, p = c(train = 0.7, test = 0.3))
str(partitions)
Train_sample <- Data[partitions$train,]
Test_sample <- Data[partitions$test,]

Model_OneR<-OneR(Train_sample[,-c(1)])
prediction<-predict(Model_OneR, Test_sample[,-c(1)])
table(prediction, Test_sample$RISK)
accuracy<-sum(prediction==Test_sample$RISK)/nrow(Test_sample)

Repeated holdout approach with One R

number_iterations<-10
accuracy<-vector(length=number_iterations)

for (j in 1:number_iterations){
set.seed(j)
partitions <- partition(Data$RISK, p = c(train = 0.7, test =
0.3))
Test <- Data[partitions$test,]
Train<- Data[partitions$train,]
Model_OneR<-OneR(Train[,-c(1)])
prediction<-predict(Model_OneR, Test[,-c(1)])
accuracy[j]<-sum(prediction== Test$RISK)/nrow(Test)
}

mean_accuracy<-mean(accuracy)
Cross validation with LR and good performance estimation
library(splitTools)
set.seed(123)
folds <- createFolds(Data$RISK, k=num_folds)
#index the elements of the test dataset

predictions_summary<-data.frame()

for (k in 1:num_folds){
Model_Logistic<-glm(RISK~.-ID,data=Data[-folds[[k]],], family
= binomial('logit'))

prediction <- predict(Model_Logistic, Data[folds[[k]],],

type="response")
##for a default binomial model the default predictions are of log-odds
(probabilities on logit scale), thus we need to include
"type="response""

predictions_summary<-rbind(predictions_summary,
data.frame(ID=Data[folds[[k]],1], RISK=Data$RISK[folds[[k]]],
prediction))
}

summary(predictions_summary)

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))
#PresenceAbsence functions require both observed and predicted values
to be numeric

#install.packages('PresenceAbsence')
library(PresenceAbsence)

confusion_matrix<-cmx(predictions_summary, 0.5)
confusion_matrix

performance_metrics<-presence.absence.accuracy(predictions_summary,
threshold = 0.5, find.auc = FALSE, st.dev = FALSE)
performance_metrics

#install.packages('pROC')
library(pROC)

roc_curve<-plot.roc(predictions_summary$RISK,
predictions_summary$prediction, direction="<")
roc_curve

auc_list<-auc(predictions_summary$RISK,
predictions_summary$prediction, direction="<")
auc_list

Cross validation with Naïve Bayes and performance estimation

#install.packages('e1071')
library (e1071)

predictions_summary<-data.frame()

for (k in 1:num_folds){

Model_NaiveBayes<-naiveBayes(RISK~.-ID,data=Data[-folds[[k]],])

prediction <- predict(Model_NaiveBayes, newdata=Data[folds[[k]],],

type='raw', threshold = 0.001)
predictions_summary<-rbind(predictions_summary,
data.frame(ID=Data[folds[[k]],1], RISK=Data$RISK[folds[[k]]],
prediction=prediction[,'1']))
}

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))

performance_metrics<-presence.absence.accuracy(predictions_summary,
threshold = 0.5, find.auc = FALSE, st.dev = FALSE)
performance_metrics

confusion_matrix<-cmx(predictions_summary)
confusion_matrix

roc_curve<-plot.roc(predictions_summary$RISK,
predictions_summary$prediction, legacy.axes= T, direction="<")
roc_curve

auc_list<-auc(predictions_summary$RISK,
predictions_summary$prediction, direction="<")
auc_list

kNN with cross validation and performance estimation

library(FNN)

Data$GENDER<-NULL # binary variables must be null!!!!!

predictions_summary<-data.frame()

for (k in 1:num_folds){
Model_knn<-knn(Data_norm[-folds[[k]],-c(1,8)],
Data_norm[folds[[k]],-c(1,8)], cl=Data_norm[-folds[[k]],8], k = 2,
prob=TRUE)

prob_predictions=ifelse(Model_knn==1,attr(Model_knn, 'prob'),1-
attr(Model_knn, 'prob'))
# This is necessary because "if prob is true, the proportion of
the votes for the winning class are returned as attribute prob."
#To compute the AUC we need the probability corresponding to
the positive class

predictions_summary<-rbind(predictions_summary,
data.frame(ID=Data_norm[folds[[k]],1],
RISK=Data_norm$RISK[folds[[k]]], pred=prob_predictions))

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))

auc_list<-auc(predictions_summary$RISK, predictions_summary$pred,
direction="<")

auc_list
kNN with 1 parameter tuning
#create stratified train and test samples
library(splitTools)
library(caret)

partitions <- partition(Data_norm$RISK, p = c(train_validation = 0.8,

test = 0.2))

Test_sample <- data.frame()

Train_validation_sample <- data.frame()

Train_validation_sample <- Data_norm[partitions$train_validation,]

Test_sample <- Data_norm[partitions$test,]

num_folds<-10
max_num_neighbours<-40

set.seed(123)
folds <- createFolds(Train_validation_sample$RISK, k=num_folds)

performance_summary<-data.frame()

for (j in 1:max_num_neighbours){

predictions_summary<-data.frame()

for (k in 1:num_folds){

#8 is the column with the dependent variable

Model_knn<-knn(Train_validation_sample[-folds[[k]],-
c(1,8)], Train_validation_sample[folds[[k]],-c(1,8)],
Train_validation_sample[-folds[[k]],8], k = j, prob=TRUE)

prob_predictions=ifelse(Model_knn==1,attr(Model_knn,
'prob'),1-attr(Model_knn, 'prob'))
# This is necessary because "if prob is true, the
proportion of the votes for the winning class are returned as
attribute prob."
#To compute the AUC we need the probability
corresponding to the positive class

predictions_summary<-rbind(predictions_summary,
data.frame(ID=Train_validation_sample[folds[[k]],1],
RISK=Train_validation_sample$RISK[folds[[k]]], pred=prob_predictions))

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))

performance_metrics<-presence.absence.accuracy(predictions_summary,
threshold = 0.5, find.auc = FALSE, st.dev = FALSE)

performance_summary<-rbind(performance_summary,
data.frame(num_neighbors=j, accuracy=performance_metrics$PCC))
}
num_neighbors<-
performance_summary[performance_summary$accuracy==max(performance_summ
ary$accuracy),"num_neighbors"]

Final_Model_knn<-knn(Train_validation_sample[,-c(1,8)], Test_sample[,-
c(1,8)], Train_validation_sample[,8], num_neighbors, prob=TRUE)

prob_predictions=ifelse(Final_Model_knn==1,attr(Final_Model_knn,
'prob'),1-attr(Final_Model_knn, 'prob'))
# This is necessary because "if prob is true, the proportion of the
votes for the winning class are returned as attribute prob."
#To compute the AUC we need the probability corresponding to the
positive class

predictions_summary<-data.frame(ID=Test_sample[,1],
RISK=Test_sample$RISK, pred=prob_predictions)

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))

performance_metrics<-presence.absence.accuracy(predictions_summary,
threshold = 0.5, find.auc = FALSE, st.dev = FALSE)
performance_metrics

DT (Decision trees) with 1 parameter tuning

library(rpart)

num_folds<-10

set.seed(123)
folds <- createFolds(Data$RISK, k=num_folds)
#index the elements of the test dataset

#########################GINI INDEX############################

predictions_summary<-data.frame()

for (k in 1:num_folds){

Model_tree<-rpart(RISK~.-ID,data=Data[-folds[[k]],], parms =
list(split = 'gini'))
# parms - optional parameters for the splitting function.

prediction <- predict(Model_tree, newdata=Data[folds[[k]],])

predictions_summary<-rbind(predictions_summary,
data.frame(ID=Data[folds[[k]],1], RISK=Data$RISK[folds[[k]]],
prediction=prediction[,'1']))
}

#check constructed tree

fancyRpartPlot(Model_tree)

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))
performance_metrics<-presence.absence.accuracy(predictions_summary,
threshold = 0.5, find.auc = FALSE, st.dev = FALSE)
performance_metrics

roc_curve<-plot.roc(predictions_summary$RISK,
predictions_summary$prediction, legacy.axes= T, direction="<")
auc_list<-auc(predictions_summary$RISK,
predictions_summary$prediction)
auc_list

#######Parameter Tuning ############

#create stratified train and test samples
partitions <- partition(Data$RISK, p = c(train_validation = 0.8, test
= 0.2))

Test_sample <- data.frame()

Train_validation_sample <- data.frame()

Train_validation_sample <- Data[partitions$train_validation,]

Test_sample <- Data[partitions$test,]

set.seed(123)
folds <- createFolds(Train_validation_sample$RISK, k=num_folds)

performance_summary<-data.frame()
criterion<-c('information','gini')
min_num_objects<-20

aux=0

for (l in criterion)
{
for (i in c(0.01,0))
{
for (j in 2:min_num_objects)
{
predictions_summary<-data.frame()
for (k in 1:num_folds)
{

Model_tree<-rpart(RISK~.-ID,data=Train_validation_sample[-
folds[[k]],], parms = list(split = l), minbucket = j, cp=i)
#minsplit: minimum number of observations in a node before
attempting a split
#minbucket: minimum number of observations in a leaf
#cp complex parameter; cp - penalizes complexity; cp=0 - no
pruning is done

prediction <- predict(Model_tree,

newdata=Train_validation_sample[folds[[k]],])

predictions_summary<-rbind(predictions_summary,
data.frame(ID=Train_validation_sample[folds[[k]],1],
RISK=Train_validation_sample$RISK[folds[[k]]],
prediction=prediction[,'1']))

}
predictions_summary$RISK<-
as.numeric(as.character( predictions_summary$RISK))

performance_metrics<-
presence.absence.accuracy(predictions_summary, threshold = 0.5,
find.auc = FALSE, st.dev = FALSE)

performance_summary<-rbind(performance_summary,
data.frame(criterion=l, cp=i, min_objects=j,
accuracy=performance_metrics$PCC))

}
}
}

l<-
performance_summary[which(performance_summary$accuracy==max(performanc
e_summary$accuracy))[1],"criterion"]
i<-
performance_summary[which(performance_summary$accuracy==max(performanc
e_summary$accuracy))[1],"cp"]
j<-
performance_summary[which(performance_summary$accuracy==max(performanc
e_summary$accuracy))[1],"min_objects"]

####

Final_model_tree<-rpart(RISK~.-ID,data=Train_validation_sample,
parms = list(split = l), minbucket = as.numeric(j), cp=i)

prediction <- predict(Final_model_tree, newdata=Test_sample)

predictions_summary<-data.frame(ID=Test_sample[,1],
RISK=Test_sample$RISK, prediction=prediction[,'1'])

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))

confusion_matrix<-cmx(predictions_summary, 0.5)
confusion_matrix

performance_metrics<-presence.absence.accuracy(predictions_summary,
threshold = 0.5, find.auc = FALSE, st.dev = FALSE)
performance_metrics

Clustering Plots
## PositionDesc attribute as a factor
ds$PositionsDesc <- as.factor(ds$PositionsDesc)

### Plot the histogram of the attribute Freekicks

hist(ds$Freekicks, breaks = seq(min(ds$Freekicks), max(ds$Freekicks),
by=2), xlab = "Freekicks", ylab="count", col="lightsteelblue",
border="black")
### Alternatively, using ggplot:
#install.packages("ggplot2")
#library(ggplot2)
ggplot(data=ds, aes(x=Freekicks)) + geom_histogram(binwidth = 2,
color="blue4", fill="lightblue", alpha=0.9) + theme_bw ()

### Plot the frequency polygon of attribute Heading

library(ggplot2)
qplot(ds$Heading,col="dark red",xlab="Heading",ylab="count", geom =
"freqpoly", binwidth=2)
### Alternatively, using ggplot:
ggplot(data=ds, aes(x=Heading)) + geom_freqpoly(binwidth = 2,
color = "red4", size=1.5) + theme_classic()

## variable to keep the column number for the last numerical variable
last_var <- ncol(ds)-1
## parallel coordinates and scatter plot matrix
#install.packages("GGally")
#install.packages("ggrepel")
library(GGally)
library(ggrepel)

ggparcoord(ds, columns = 3:last_var, groupColumn=last_var + 1,

scale="globalminmax")
# scale is a character string denoting how to scale the variables in
the parallel coordinate plot
# scale=globalminmax, no scaling is done; the range of the graphs is
defined by the global minimum and the global maximum
ggpairs(ds[,3:last_var]) # Make a matrix of plots

## Save the plot into a file. It may easier to analyze the plot
p <- ggpairs(ds[,3:last_var])
ggsave("pairs.pdf", plot = p , width = 30 , height = 30 )

## closer look at two variables (Strength and Flair)

## Observe the relationship between attributes Strength and Flair
using a scatter plot
qplot(ds$Strength, ds$Flair ,col="red",xlab="Strength",ylab="Flair")
### Alternatively, using ggplot:
p <- ggplot(data=ds, aes(x=Strength, y=Flair)) +
geom_point(shape=21, color="red4", fill="red3" , size=4) + theme_bw()
## add names
p + geom_label(aes(label=Name), size=0.5)
library(ggrepel) ## to plot names without overlap
p + geom_label_repel (aes(label=Name) , size = 2 , fill = "gold3",
alpha = 0.75 )
# jitter to spread overlaped point (label_repel does not follow
the jitter :-( )
p2 <- ggplot(data=ds, aes(x=Strength, y=Flair)) +
geom_jitter(shape=21, color="red4", fill="red3", size=4) + theme_bw()
# geom_jitter adds a small amount of random variation to the
location of each point, and is a useful way of handling
# overplotting caused by discreteness in smaller datasets

k-Means Full Approach

# Check the number of clusters (k) using the elbow method
k_num <- 2:15
num_tries <- 10 ## Run the K-means algorithm 10 times for each k
## vector to keep the average within sum of squares for each k
avg.tot.w.ss <-double(length(k_num))
for( v in k_num){ ## for each value of the range variable
v.tot.w.ss <-double(num_tries) # vector to hold the 10 tries
for(i in 1:num_tries){
k.temp <- kmeans( ds [, 3:last_var ] , centers=v) #Run k-means
v.tot.w.ss[i] <-k.temp$tot.withinss #Store the total withinss
}
avg.tot.w.ss[v-1] <-mean(v.tot.w.ss) #Average the 10 total withinss
}
plot(k_num, avg.tot.w.ss, type="b", main="Total Within SS by Various
K", ylab="Average Total
Within Sum of Squares", xlab="Value of K")

# OR Check the number of clusters with the very useful NbClust package
library(NbClust)

## Davies and Bouldin index

NbClust(ds[, c(3:last_var)] ,distance = "euclidean", min.nc = 2,
max.nc = 15, method = "kmeans", index = "db")

#silhouette index
NbClust( ds[ , c(3:last_var ) ] ,distance = "euclidean", min.nc = 2,
max.nc = 15, method = "kmeans", index = "silhouette")

#all indexes
NbClust (ds[,c(3:last_var)] ,distance="euclidean", min.nc=2,
max.nc=20, method="kmeans",index="all")

## silhouette plot
#install.packages("cluster")
library(cluster)
dis <- dist(ds[,3:last_var])^2 #dist() computes and returns the
distance matrix
cl <- kmeans(ds[,3:last_var], 2)
sil <- silhouette(cl$cluster, dis)
windows()
plot(sil)

#chosen number of clusters is 2

last_var <- ncol(ds)-1
k <- kmeans(ds[,3:last_var], centers = 2)

# create a dataframe with the centroids of the 2 clusters

k_centers <- data.frame(k$centers,cl=c(1,2))
k_centers$cl <- as.factor(k_centers$cl)

#attribute that enables to track the cluster assigned

ds <- cbind(ds , k$cluster)
colnames(ds)[ncol(ds)] <- "cluster"
ds$cluster <- factor(ds$cluster

## parallel coordinates coloured by cluster

library(ggplot2)
library(GGally)
ggparcoord(data = ds , columns = c(3:last_var), groupColumn =
ncol(ds), scale = "globalminmax") + theme_bw()

## see the name in each cluster

ds <- cbind(ds , k$cluster)
colnames(ds)[ncol(ds)] <- "cluster"
ds$cluster <- factor(ds$cluster)
ds[order(ds$cluster), c('Name', 'cluster')]

PAM with k specified

ds_scaled <- cbind(ds[,1:2], scale(ds[ ,-c(1,2,19)]), ds[,19])

library(cluster)
pam_cl <- cluster::pam(ds_scaled[, 4:ncol(ds_scaled)], k=2)

# check and observe medoids

pam_cl$id.med
ds_scaled[c(3,28),]

Ensemble PAM and k-means

pam_cl <- cluster::pam(ds_scaled[, 4:ncol(ds_scaled)], k=2)
km_cl <- kmeans(ds_scaled[,4:ncol(ds_scaled)], centers=2)

ds_cl <- data.frame(ds_scaled, pam=pam_cl$clustering,

km=km_cl$cluster)

#compare results
table(pam=ds_cl$pam, kmeans=ds_cl$km)
#OR
#length(which(ds_cl$pam==1 & ds_cl$km==1))
#length(which(ds_cl$pam==1 & ds_cl$km==2))
#length(which(ds_cl$pam==2 & ds_cl$km==1))
#length(which(ds_cl$pam==2 & ds_cl$km==2))

PAM without k specified (use silhouette metric)

#install.packages("fpc")
library(fpc)
pamk_cl <- fpc::pamk(ds_scaled[, 4:ncol(ds_scaled)])

##medoids
pamk_cl$pamobject$id.med

table(pam=ds_cl$pam, pamk=pamk_cl$pamobject$clustering)

Fuzzy clustering
f_kmeans <- cluster::fanny(ds_scaled[, 4:ncol(ds_scaled)], k = 2,
memb.exp = 1.3)
### memb.exp close 1 gives increasingly rigid clusterings whereas
higher values of r leads to complete fuzzyness

data.frame(cbind(ds_scaled$Name, round(f_kmeans$membership*100,2)))
apply(f_kmeans$membership, 1, sum)

#plotting
#install.packages("factoextra")
library(factoextra)
fviz_cluster(f_kmeans, ellipse.type = "norm", choose.vars =
c("Heading", "Passing"),legend = "right")
Hierarchical clustering
dist_data <- dist(ds_scaled[, 4:ncol(ds_scaled)]) ## check options
with ?dist

## try several methods, e.g. "ward.D", "single", "complete", "average"

, "median",
hc <- hclust(dist_data, method = "single" )

plot(hc, labels = ds_scaled$Name)

rect.hclust(hc, k=3)

groups <- cutree(hc, k=5)

Dataset_cluster<-cbind(Dataset,groups)

DBSCAN
## play with the parameters and if you should use the scaled data or
not
dsc_cl <- fpc::dbscan(ds_scaled[, 4:ncol(ds)], eps = 3, MinPts = 5)
dsc_cl$cluster

## Plotting Clusters resultant from dbscan clustering considering

attributes 'Dribbling' and 'Heading'
plot(dsc_cl, ds_scaled[, c(4,6)], main = "Dribbling vs Heading")

## Plotting Clusters resultant from dbscan clustering considering all

attributes
plot(dsc_cl, ds_scaled[, 4:ncol(ds)], main = "DBScan")

Visualization
library(readr)
library(dplyr)
library(lubridate)

countries <- c( "France","Spain" )

ds_raw <-
read_csv("https://opendata.ecdc.europa.eu/covid19/nationalcasedeath/
csv/data.csv", col_types ="cccncncnncc" )

ds <- ds_raw %>%

filter ( country %in% countries & indicator == 'cases' ) %>% ##
select countries
arrange ( year_week ) %>% ## to make sure it is sorted
group_by ( country ) %>%
slice_tail ( n = 30 ) %>%
mutate ( seq = row_number() ) %>% ## sequential number
ungroup () %>%
select ( -source,-note ) %>% ## select all columns but 'source'
rename ( cum_count = cumulative_count )

library ( ggplot2 )
p <- ggplot(data = ds, aes(x = seq , y = weekly_count, color =
country))
## in p1 properties are outside aes() and are fixed
p1 <- p + geom_line( size = 2 , linetype = "dashed" , alpha = 0.5)
## in p2 properties depend on the 'country' variable
p2 <- p + geom_line( aes ( linetype = country ) ) + geom_point( aes
( shape = country ) )

## col makes bars such that heights of bars represent values in the
data
p + geom_col()

## violin represents a distribution

p + geom_violin( aes ( fill = country ) , color = "black", alpha = 0.5
)

#Faceting - Divides a plot into subplots based on the value of

discrete variable(s)
p + facet_wrap( ~ country, nrow = 1)

#crop x axis
slice_tail ( n = 30 ) %>%

# scales control the mapping from data to aesthetics

p2 <- p + scale_x_continuous( name = "week", limits = c( 0, 31 ) ,
breaks = c( 1 , 15 , 30 ) )
p3 <- p2 + scale_color_manual( values = c( "Portugal" = "red3" , Spain
= "blue4" , "Germany" = "green4", "France" =
"gold3") )
library( scales )
p4 <- p3 + scale_y_continuous( name = "weekly cases" , labels =
unit_format(unit = "k", scale = 1e-3 ) )
p5 <- p4 + guides ( color = "none" )

p + scale_color_grey()
p + scale_color_brewer( type = "seq" )
p + scale_color_brewer( type = "qual" )
p + scale_color_brewer( type = "div" )
p + scale_color_brewer( palette = "Dark2" )

p5 + theme_bw()
p5 + theme_classic()
p5 + theme_grey()
p5 + theme_minimal()

#saving the plot

ggsave(filename = "plot_p5.png", plot = p5 , width = 15, height = 10,
units = "cm", dpi = 600)
ggsave("plot_p5.pdf", plot = p5 , width = 15, height = 10, units =
"cm")

Association rules
# define as factor various columns in 1 line
data[,1:ncol(data)]<-lapply(data[,1:5], as.factor)

#install.packages("arules")
library(arules)

# find frequent itemsets

frequent_sets<- apriori(data, parameter = list(support=0.5, target =
"frequent"))
inspect(sort(frequent_sets, by = 'support'))
# find association rules. Set minlen=2 to avoid rules with only one
item
rules<- apriori(data, parameter = list(support=0.4, confidence = 0.6,
minlen=2))
inspect(rules)

#plot
library(arulesViz)
plot(rules, method = "graph", engine = "htmlwidget")

# Generate the rules, with 2 itemsets, that lead to column 'pep' = NO

Plan (pep)
no_pep_rules <- apriori(toy[,-c(1,ncol(toy))], parameter =
list(support=0.5,appearance = list (rhs="pep=NO"))
inspect(no_pep_rules)

Example Exam 1
1. Start by writing the code that enables you to read the 'sales_data.txt' dataset file and to set
the appropriate data type.
sales_data = read_csv(‘sales_data.txt’)
sales_data$prod = as.factor(sales_data$prod)

2. Write the code that allows to compute how many reports do not include both the respective
Quantity and Value.

aux <- apply(data[‘Quant’,’val’],1,function(x)sum(is.na(x)))

length(aux[aux>0])
OR
Df = sales_data[which(is.na(sales_data$Quant)&is.na(sales_data$val)),]
nrow(df)

3. Create a new dataset excluding those reports (those that do not include both the respective
Quantity and
Value).

Df = sales_data[which(aux=0),]
Df = sales_data[-which(is.na(sales_data$Quant)&is.na(sales_data$val))]

4. In order to obtain a deeper knowledge on the new dataset, construct a barplot that depicts
the number of
reports per product (use the new dataset).
library(ggplot2)
p <- ggplot(data = ds, aes(x = Prod)) + geom_bar()

5. There are two products (Prod=2442 and Prod=2443) that have all their transactions with
unknown values of the quantity. Exclude from the new dataset all records refereeing to these
products.
Df = sales_data[-which((sales_data$Prod==”2442”)|
is.na(sales_data$Prod==”2443”)),]

6. Given the different quantity of products that are sold on each transaction, it is more correct
to center the
analysis on the unit price rather than on the total monetary value of the sale (Val). Add a new
column to the
dataframe (obtained in the last question) with the unit price and keep the Val column.
Df$Up = Df$Val / Df$Quant

7. You are considering to replace the missing values on the unit price by the average price of the
corresponding product. Create a new object with the mean prices by product.
tapply(sales_data$Up, sales_data$Prod, mean)

8. Since you were not sure how to use the table just created to fill in the missing values, you
decided to replace the missing values on the unit price by the global median unit price. Write
the code for this purpose.
df[is.na(df$Up), "Up"] <- median(df$Up, na.rm = T)

9. For those reports in which the quantity is not known, infer it based on the unit price and total
value.
df[is.na(df$Quant), "Quant"] <- df$Val / df$Up

10. Exclude the column Val of the dataset.

df$Val = NULL

11. Write the code that enables to run a classification model based on k-NN (k=2). Use all data
records with a known target value as training data and predict the unknown target values.

sales_processed$X<-NULL
sales_processed$Prod<-as.factor(sales_processed$Prod)

#Min-max scaling
new_max<-1
new_min<-0
norm <- function(x) {(x - min(x, na.rm=TRUE))/(max(x,na.rm=TRUE) -
min(x, na.rm=TRUE))*((new_max-new_min)+new_min)}

sales_processed[,-c(1,2,3,5)] <- apply(sales_processed[,-

c(1,2,3,5)],2, norm)

# binary variables must be null!!!!!

train = sales_processed[-which(sales_processed$Insp = ‘unkn’)]
test = sales_processed[which(sales_processed$Insp = ‘unkn’)]

predictions_summary<-data.frame()

Model_knn<-knn(train[,-c(1,8)], test[,-c(1,8)], cl=train[,8], k = 2,

prob=TRUE)

prob_predictions=ifelse(Model_knn==1,attr(Model_knn, 'prob'),1-
attr(Model_knn, 'prob'))

# This is necessary because "if prob is true, the proportion of

the votes for the winning class are returned as attribute prob."
#To compute the AUC we need the probability corresponding to
the positive class

predictions_summary<-rbind(predictions_summary,
data.frame(ID=test[,1], RISK=test$RISK, pred=prob_predictions)))

predictions_summary$RISK<-
as.numeric(as.character(predictions_summary$RISK))
auc_list<-auc(predictions_summary$RISK, predictions_summary$pred,
direction="<")

auc_list

12. Write the code that enables to identify the top 10% reports in terms of probability of fraud.
Those reports will be part of a rigorous control.

predictions_summary_ordered<-predictions_summary[order(-pred),]
predictions_summary_ordered[1:round(nrow(test)/10),]

Exam example 2
1. Start by writing the code that enables you to read the dataset file and to set the appropriate
data type.

2. Write the code that allows to estimate the standard deviation for each variable (if possible,
at once).
df_sd = apply(df,2,sd())

3. Write the code that enables to add a new column with the music style.
Dataset_ArtistStyle<-cbind(c("Abba","Beatles","Eels","Vivaldi",
"Mozart","Beethoven","Enya"),c("Rock","Rock","Rock","Classical",
"Classical","Classical","New Wave"))
colnames(Dataset_ArtistStyle)[1:2]<-c("V1","Music_Style")
Dataset2=merge(Dataset,Dataset_ArtistStyle,by.x = "Artist",by.y
= "V1",all.x = TRUE)
View(Dataset2)

4. Write the code that enables to estimate the average of “LVar” for each music style (if
possible at once).
tapply(df$LVar, df$V1, mean)

5. Write the code that enables to normalize the numeric data using a Z-score approach (if
possible at once).

data[,2:6] <-
apply(data[,2:6], 2 , function(x) {(x - mean(x,
na.rm=TRUE))/(sd(x,na.rm=TRUE))} )

6. Write the code that enables to create a sample of 80% of the original data. This sample

p=0.7
data_sample <- data.frame()
for (i in levels(data$origin)){
data_sub <- data[data$origin==i,]
B = ceiling(nrow(data_sub)*p) #rounds up
data_sub_aux <- data_sub[sample(1:nrow(data_sub),B),]
data_sample <- rbind(data_sample,data_sub_aux)
BUT TO HAVE BOTH 80% AND 20%

set.seed(5)
p = 0.2
Data$Artist<-factor(Data$Artist)
Test_sample <- data.frame()
Train_sample <- data.frame()

for(i in levels(Data$Artist)) {
dsub <- Data[Data$Artist == i,]
B = ceiling(nrow(dsub) * p)
C = sample(1:nrow(dsub), B)
dsub_1 <- dsub[C, ]
dsub_2 <- dsub[-C,]
Test_sample <- rbind(Test_sample, dsub_1)
Train_sample <- rbind(Train_sample, dsub_2)
}
Data_train<-Train_sample

7. Write the code that allows to assign the musics of the sample obtained to one of five
clusters constructed using a hierarchical clustering with average link distance. Please consider
only the numeric variables.

dist_data <- dist(ds_scaled[, 4:ncol(ds_scaled)]) ## check options

with ?dist

## try several methods, e.g. "ward.D", "single", "complete", "average"

, "median",
hc <- hclust(dist_data, method = "average" )

plot(hc, labels = ds_scaled$Name)

rect.hclust(hc, k=5)
groups <- cutree(hc, k=5)
Dataset_cluster<-cbind(Dataset,groups)

Example exam 3
3. number of visits and total amount spent per costumer. Cluster according to these
Visits = tapply(data$UnitPrice*data$Quantity, data$CostumerID, sum)
Amount = tapply(data$InvoiceNo, data$CostumerID, function(x)
length(unique(x)))
Clustering = kmeans(data.frame(Amount, Visits),5)

4. discretize with equal width to create bins

library(infotheo)
discretize(algae$Cl, disc="equalwidth", nbins=5)

5. use boxplot to identify outliers of total amount

Length(boxplot(data$UnitPrice*data$Quantity)$out)
6. STRATIFIED SAMPLING WITH HYPERPARAMETER TUNING OF DT AND AUC FOR PREDICITING
IF QUANTITY IS HIGHER THAN 100

set.seed(5)
p1 = 0.6
p2 = 0.2
data$dep = 0
data$dep[which(data$Quantity > 100)] = 1
data$dep = as.factor(data$dep)

Test_sample <- data.frame()

Train_sample <- data.frame()
Validation_sample <- data.frame()

for(i in levels(Data$Artist)) {
dsub <- Data[Data$Artist == i,]
B1 = ceiling(nrow(dsub) * p1)
B2 = ceiling(nrow(dsub) * p2)
C1 = sample(1:nrow(dsub), B1)
dsub_1 <- dsub[C1, ]
dsub_2 <- dsub[-C1,]
C2 = sample(1:nrow(dsub_2)
dsub_3 <- dsub_2[C2,]
dsub_4 <- dsub_2[-C2,]
Test_sample <- rbind(Test_sample, dsub_1)
Validation_sample <- rbind(Validation_sample, dsub_3)
Train_sample <- rbind(Train_sample, dsub_4)
}

library(rpart)
Model_information<-rpart(dep~
StockCode+UnitPrice+CustomerID+Country,data=Train, method = "class",
parms = list(split = “information”))

Model_gini<-rpart(dep~
StockCode+UnitPrice+CustomerID+Country,data=Train, method = "class",
parms = list(split = “gini”))

Prediction_information <- predict(Model_information,

newdata=Validation)

Prediction_gini <- predict(Model_gini, newdata=Validation)

auc_list_info<-auc(as.numeric(as.character(Validation$dep)),
Prediction_information[,'1'], direction='<')
auc_list_info

auc_list_gini<-auc(as.numeric(as.character(Validation$dep)),
Prediction_gini[,'1'], direction='<')
auc_list_gini

if (auc_list_info> auc_list_gini){
prediction <- predict(Model_information, newdata=Test)
auc<- auc(as.numeric(as.character(Test$dep)), prediction[,'1'],
direction='<')}
else {prediction <- predict(Model_gini, newdata=Test)
auc<- auc(as.numeric(as.character(Test$dep)), prediction[,'1'],
direction='<')}
7. Write the R code that enables to ignore observations with missing values and then create a
graph that shows the relationship between the Quantity and the UnitPrice. The Country of the
transactions should be represented by the color of the observations
new_data <-na.omit(new_data)
library(ggplot2)
ggplot(data, aes(x= UnitPrice, y= Quantity,colour= Country)) +
geom_point()
plot(data$UnitPrice, data$Quantity, col=data$Country)

8. Write the R code that enables to replace the Description values starting by “?” by NA
data[substr(data$Description,1,1)=="?","Description"]<-NA

9. Write the R code that enables to know how many rows do not have a CustomerId
associated, i.e. are missing values
sum(is.na(data$CustomerID))

10. Write the R code that calculates simultaneously the median of the variables Quantity and
UnitPrice, per country
sapply(c(‘quantity’,’unitprice’),function(x)tapply(data[,x],
data$Country, median))

PART 1
1.1 Write the code to load the datasets and classify each variable according to their type.

1.2 Write the code to include a variable that identifies which type of wine refers (red or white)
each observation, and integrate the two datasets.

1.3 Assume that the “alcohol” is dependent on the "volatileacidity" and "citricacid". Write the
code to infer the missing values of “alcohol” variable considering a linear regression.

1.4 Write the code to estimate the mean of "fixedacidity" and "volatileacidity" for each quality
level. Try to use one single code statement.

1.5 Write the code that uses the complete dataset to train an ensemble classification algorithm
that enables to predict the quality of the wine, using the remaining attributes. For this purpose
use oneR algorithm and decision tree algorithm (with default parameters). Estimate the
training error of this ensemble in order to measure the overfitting.

#part I
#1
setwd("C:/Users/afons/Downloads/AE_Normal_2022")

red <- read.csv('winequality-red.csv',header = T,sep = ";",dec = ".",stringsAsFactors = F)

white <- read.csv('winequality-white.csv',header = T,sep = ";",dec = ".",stringsAsFactors = F)

#2
red$type = 'red'
white$type = 'white'

colnames(red)[12] = 'quality'

wine = rbind(red,white)
wine$type = as.factor(wine$type)

#3
length(which(is.na(wine$volatileacidity)))
length(which(is.na(wine$citricacid)))

model<-lm(alcohol ~ volatileacidity+citricacid, data = wine[-which(is.na(wine$alcohol)),])

wine[which(is.na(wine$alcohol)),'alcohol'] = (model[["coefficients"]][1] +
model[["coefficients"]]
[2]*wine[which(is.na(wine$alcohol)),'volatileacidity']+
model[["coefficients"]][3]*wine[which(is.na(wine$alcohol)),'citricacid'])

#4
sapply(c("fixedacidity","volatileacidity"),function(x)tapply(wine[,x], wine$quality, median))

#5
#install.packages("OneR")
library('OneR')
library(rpart)
wine$quality = as.character(wine$quality)
wine$quality = as.factor(wine$quality)

Model_OneR<-OneR(wine[,-c(13)])
prediction_OneR<-predict(Model_OneR, wine[,-c(13)])

Model_tree<-rpart(quality~.,data=wine[,-c(13)], parms = list(split = 'gini'))

prediction_tree<-predict(Model_tree, wine[,-c(13)])
apply(prediction)

prediction_DT = colnames(prediction_tree)[apply(prediction_tree,1,which.max)]
prediction_DT = as.factor(prediction_DT)

table(prediction_OneR, prediction_DT)

table(prediction_OneR, wine$quality)
1 - (612 + 2419 + 24) / nrow(wine)

table(prediction_DT, wine$quality)
1 - (1338 + 2107) / nrow(wine)
PART 2
Using the dataset that integrate the records for both white and red wine answer the following
questions.

2.1 Use ggplot to create a set of parallel boxplots showing the alcohol level for the different
levels of quality. Write a sentence commenting the plot.

2.2 Create a scatter plot having 'citricacid' in the x-axis and ' volatileacidity' in the y-axis. Color
the points by the wine type (red or white). Do something to deal with the points that overlap.
Write a sentence commenting the plot.

2.3 Use the k-means clustering algorithm to analyze if there are groups of records that can be
induced from the values of the chemical variables. Justify your choice for the number of
clusters and any other step in the analysis that you think was necessary. Write a sentence
commenting the results.

# Part 2
#1
library(ggplot2)
p <- ggplot(data = wine, aes(x = alcohol, color = quality))
p + geom_boxplot()
# From quality 3 to 6 the alcohol level seems to decrease more significantly.
# Then from 6 to 8, the opposite patter

#2
p1 <- ggplot(data = wine, aes(x = citricacid, y = volatileacidity, color = type))
p1 + geom_point() + facet_wrap(~type)
# to deal with overlap, plot is divided into subplots with facet_wrap

#red and white have symetric behaviour regarding volatileacidity and citricacid
#red tends to have higher values of of volatileacity
#while white has higher values for citricacid
#in fact even the outliers of both show this pattern
#nevertheless both present a really wide range with linearity not being an adequate
assumption

#It would be intering to identify clusters either related to the quality level
# or to the type of wine

#To check the number of clusters, the very useful NbClust package will be used
library(NbClust)

## Davies and Bouldin index

NbClust(wine[, -c(12,13)] ,distance = "euclidean", min.nc = 2, max.nc = 10, method = "kmeans",
index = "db")

#silhouette index
NbClust( wine[, -c(12,13)] ,distance = "euclidean", min.nc = 2, max.nc = 10, method =
"kmeans", index = "silhouette")
#Both yield good results for 2 and 3 clusters

## silhouette plot
#install.packages("cluster")
library(cluster)
dis <- dist(wine[, -c(12,13)])^2 #dist() computes and returns the distance matrix
cl <- kmeans(wine[, -c(12,13)], 3)
sil <- silhouette(cl$cluster, dis)
windows()
plot(sil)

#An alternative would be to use the elbow method

k_num <- 2:10

num_tries <- 10 ## Run the K-means algorithm 10 times for each k

## vector to keep the average within sum of squares for each k

avg.tot.w.ss <-double(length(k_num))
for( v in k_num){ ## for each value of the range variable
v.tot.w.ss <-double(num_tries) # vector to hold the 10 tries
for(i in 1:num_tries){
k.temp <- kmeans( wine[, -c(12,13)] , centers=v) #Run k-means
v.tot.w.ss[i] <-k.temp$tot.withinss #Store the total withinss
}
avg.tot.w.ss[v-1] <-mean(v.tot.w.ss) #Average the 10 total withinss
}
plot(k_num, avg.tot.w.ss, type="b", main="Total Within SS by Various K", ylab="Average Total
Within Sum of Squares", xlab="Value of K")

#from 2 to 3 there is a big gap in the total within SS

#chosen number of clusters is 2 to identify type of wine

k <- kmeans(wine[, -c(12,13)], centers = 2)

# create a dataframe with the centroids of the 2 clusters

k_centers <- data.frame(k$centers,cl=c(1,2))
k_centers$cl <- as.factor(k_centers$cl)

#attribute that enables to track the cluster assigned

wine <- cbind(wine , k$cluster)
colnames(wine)[ncol(wine)] <- "cluster"
wine$cluster <- factor(wine$cluster)

## parallel coordinates coloured by cluster

library(GGally)
ggparcoord(data = wine , columns = c(1:11), groupColumn = ncol(wine), scale =
"globalminmax") + theme_bw()

table(wine$type,wine$cluster)

CV Sisgp 2023 - 2024
No ratings yet
CV Sisgp 2023 - 2024
4 pages
BAN5
No ratings yet
BAN5
2 pages
Codes
No ratings yet
Codes
14 pages
UL3
No ratings yet
UL3
2 pages
BDA MSC It
No ratings yet
BDA MSC It
35 pages
R Assignment
No ratings yet
R Assignment
8 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Data Cleaning
No ratings yet
Data Cleaning
4 pages
R Practice
No ratings yet
R Practice
38 pages
English Boss
No ratings yet
English Boss
4 pages
Ex 10 - Decision Tree With Rpart and Fancy Plot and Cardio Data
No ratings yet
Ex 10 - Decision Tree With Rpart and Fancy Plot and Cardio Data
4 pages
Supervised Learning Example
No ratings yet
Supervised Learning Example
6 pages
Final Practical
No ratings yet
Final Practical
53 pages
datamining
No ratings yet
datamining
20 pages
Heart Disease Prediction Model
No ratings yet
Heart Disease Prediction Model
35 pages
Da Exp9,10
No ratings yet
Da Exp9,10
9 pages
FRA Assignment - India Credit Model
No ratings yet
FRA Assignment - India Credit Model
14 pages
DA_Programs
No ratings yet
DA_Programs
44 pages
Datamining 2
No ratings yet
Datamining 2
54 pages
Week2 R Program
No ratings yet
Week2 R Program
4 pages
Hariks
No ratings yet
Hariks
5 pages
WEEK
No ratings yet
WEEK
17 pages
20BCE1205 Lab6
No ratings yet
20BCE1205 Lab6
12 pages
Design of Experiments
No ratings yet
Design of Experiments
10 pages
saurabh
No ratings yet
saurabh
22 pages
Digital Assignment-6: Read The Data
No ratings yet
Digital Assignment-6: Read The Data
30 pages
Final Objectives
No ratings yet
Final Objectives
27 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
Prac 1 AP
No ratings yet
Prac 1 AP
9 pages
4063 Final复习资料
No ratings yet
4063 Final复习资料
6 pages
7708 - MBA PredAnanBigDataNov21
No ratings yet
7708 - MBA PredAnanBigDataNov21
11 pages
PreProcessing With R
No ratings yet
PreProcessing With R
6 pages
HW11數學規劃
No ratings yet
HW11數學規劃
14 pages
Rallfun v37
No ratings yet
Rallfun v37
1,294 pages
R Code Default Data PDF
No ratings yet
R Code Default Data PDF
10 pages
ds (1)
No ratings yet
ds (1)
2 pages
1
No ratings yet
1
19 pages
LSD
No ratings yet
LSD
7 pages
ds
No ratings yet
ds
2 pages
Predicting-Earnings-Manipulation_FinalDoc
No ratings yet
Predicting-Earnings-Manipulation_FinalDoc
29 pages
Prac 04 MST
No ratings yet
Prac 04 MST
6 pages
Aman DA 111
No ratings yet
Aman DA 111
14 pages
Solutions - Lab 4 - Assumptions & Multiple Comparisons: Learning Outcomes
No ratings yet
Solutions - Lab 4 - Assumptions & Multiple Comparisons: Learning Outcomes
23 pages
MicroArray Analysis - 201
No ratings yet
MicroArray Analysis - 201
13 pages
CODE.project
No ratings yet
CODE.project
42 pages
r-cheatsheet-ABCD
No ratings yet
r-cheatsheet-ABCD
3 pages
Grid Search For KNN
No ratings yet
Grid Search For KNN
17 pages
Caderno 2 - Exercícios 5 A 11
No ratings yet
Caderno 2 - Exercícios 5 A 11
16 pages
Merge
No ratings yet
Merge
28 pages
R
No ratings yet
R
6 pages
R Code For Canonical Correlation Analysis
No ratings yet
R Code For Canonical Correlation Analysis
10 pages
Appendix D To Keevil Et Al. 2021. A Model of Seasonal Variation in Somatic Growth Rates Applied To Two Temperate Turtle Species
No ratings yet
Appendix D To Keevil Et Al. 2021. A Model of Seasonal Variation in Somatic Growth Rates Applied To Two Temperate Turtle Species
8 pages
Lecture_13-GLMM2 (1)
No ratings yet
Lecture_13-GLMM2 (1)
70 pages
Copy Entire Document Content in R Studio: R Script Compiled by Mr. Anup Sharma (Strictly To Be Used As Class Notes)
No ratings yet
Copy Entire Document Content in R Studio: R Script Compiled by Mr. Anup Sharma (Strictly To Be Used As Class Notes)
15 pages
Pruebas de Bondad de Ajuste Con Library Rrisk en R
No ratings yet
Pruebas de Bondad de Ajuste Con Library Rrisk en R
35 pages
R Fourier
No ratings yet
R Fourier
18 pages
Toc ch1
No ratings yet
Toc ch1
9 pages
Bda Assign
No ratings yet
Bda Assign
15 pages
Da 06-10
No ratings yet
Da 06-10
14 pages
Chemo Mortality Analysis
No ratings yet
Chemo Mortality Analysis
5 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Ryanair Improves Industry Leading Customer Service - Ryanair - S Corporate Website
No ratings yet
Ryanair Improves Industry Leading Customer Service - Ryanair - S Corporate Website
3 pages
Greedy Pruning For Continually Adapting Networks
No ratings yet
Greedy Pruning For Continually Adapting Networks
60 pages
INFI Exercises FP 20210219
No ratings yet
INFI Exercises FP 20210219
26 pages
PTO000047
No ratings yet
PTO000047
4 pages
Friston Et Al 2024 Designing Ecosystems of Intelligence From First Principles
100% (1)
Friston Et Al 2024 Designing Ecosystems of Intelligence From First Principles
19 pages
Log 20201204 000037
No ratings yet
Log 20201204 000037
6,736 pages
Where Can Buy (Ebook PDF) Computing For Business Success (Pearson Original Ebook) (4e) Ebook With Cheap Price
100% (3)
Where Can Buy (Ebook PDF) Computing For Business Success (Pearson Original Ebook) (4e) Ebook With Cheap Price
49 pages
Otmane Rifi
No ratings yet
Otmane Rifi
2 pages
Short Cut Keys of Erp 9
No ratings yet
Short Cut Keys of Erp 9
4 pages
Data-Mining-Question-bank
No ratings yet
Data-Mining-Question-bank
3 pages
11 Appendix Troubleshooting FCoE
No ratings yet
11 Appendix Troubleshooting FCoE
200 pages
Unit-5 Database Security
No ratings yet
Unit-5 Database Security
14 pages
Flask Restx Readthedocs Io en Latest
No ratings yet
Flask Restx Readthedocs Io en Latest
95 pages
Arduino Musica
100% (11)
Arduino Musica
393 pages
Creating A Connection String and Working With SQL Server LocalDB - Microsoft Docs
No ratings yet
Creating A Connection String and Working With SQL Server LocalDB - Microsoft Docs
4 pages
01) CP Training-STARDOM-NPAS POU
No ratings yet
01) CP Training-STARDOM-NPAS POU
17 pages
Datasheet 70F3350
No ratings yet
Datasheet 70F3350
17 pages
Emtech Module10 Week10 Lesson
No ratings yet
Emtech Module10 Week10 Lesson
7 pages
Form CG-1 Help
No ratings yet
Form CG-1 Help
6 pages
Serato Remote Manual 1.0 PDF
No ratings yet
Serato Remote Manual 1.0 PDF
34 pages
Chapter 2
No ratings yet
Chapter 2
12 pages
NBKR Institute of Science & Technology:: Vidyanagar
No ratings yet
NBKR Institute of Science & Technology:: Vidyanagar
23 pages
Degrees of Data Abstraction
No ratings yet
Degrees of Data Abstraction
5 pages
Sub Netting Tip Sheet
No ratings yet
Sub Netting Tip Sheet
1 page
Networked Option Pods Operating Instructions July 2023 v1.3
No ratings yet
Networked Option Pods Operating Instructions July 2023 v1.3
17 pages
VPN and VNC Installtion Guide
No ratings yet
VPN and VNC Installtion Guide
21 pages
Cyber Crimes and Steps To Prevent and Control
No ratings yet
Cyber Crimes and Steps To Prevent and Control
4 pages
Z3WJG4VV9 Assignment ITDPA
No ratings yet
Z3WJG4VV9 Assignment ITDPA
10 pages
Home Automation Poster
No ratings yet
Home Automation Poster
1 page
Solutions Manual Data and Computer Communications: Read/Download
No ratings yet
Solutions Manual Data and Computer Communications: Read/Download
2 pages
ECE 301 - Digital Electronics: Sequential Logic Circuits: FSM Design
No ratings yet
ECE 301 - Digital Electronics: Sequential Logic Circuits: FSM Design
27 pages
Problem Number: 03 Problem Title:: Entity-Relationship (E-R) Diagram
No ratings yet
Problem Number: 03 Problem Title:: Entity-Relationship (E-R) Diagram
6 pages
Master 2 Exam Keys
No ratings yet
Master 2 Exam Keys
4 pages
I6157630 - TG1 - Cara Henning - Machine Learning - Voice and Speech Recognition System
No ratings yet
I6157630 - TG1 - Cara Henning - Machine Learning - Voice and Speech Recognition System
11 pages