#set the correct path where the data are stored
setwd("~/tmpBio/")
#reading input data from bracelet analysis and questionnaire
PAHCompounds= read.table(file="./PAHCompounds", sep = "\t", header = TRUE, row.names = 1)
SamplesFeatures= read.table(file="./SamplesFeatures", sep = "\t", header = TRUE, row.names = 1)
SamplesFeatures
PAHCompounds
SamplesFeatures[c(2,1),]
SamplesFeatures[c("WB2","WB1"),]
#making in the same order the rows
SamplesFeaturesSort = SamplesFeatures[(rownames(PAHCompounds)),]
SamplesFeaturesSort
#inspection of the data
sumsamples=summary(SamplesFeatures)
sumsamples
#do the same for PAHCompounds
#creating data for pie plot
mytable <- table(SamplesFeatures$Sex)
lbls <- paste(names(mytable), "\n", mytable, sep="")
pie(mytable, labels = lbls, main="Pie Chart of Sex distribution \n (with sample sizes)")
#do the same for other columns
#visualization of the data
Fluorene <-PAHCompounds$Fluorene
Phenantrene <- PAHCompounds$Phenantrene
Fluoranthene <- PAHCompounds$Fluoranthene
Pyrene <- PAHCompounds$Pyrene
boxplot(Fluorene,Phenantrene,Fluoranthene,Pyrene, main = "PAH compounds", names = c("Fluorene", "Phenantrene", "Fluoranthene", "Pyrene"),col = c("orange","green", "blue","red"))
#test of data distribution
#Kolmogorov-Smirnov test
ks.test(PAHCompounds$Fluorene, "pnorm",mean=mean(PAHCompounds$Fluorene),sd=sd(PAHCompounds$Fluorene))
#D is the value of the K-S test statistic. It means the maximum difference between the x & y probability mass function is D. Not that important.
#p-value is the important number. The smaller this number is, the less likely that x=y is true. (More technically, it is the probability of finding a situation more extreme than what you have in your data, assuming that x=y)
#alternative hypothesis=two-sided. The alternative hypothesis is denoted H1. A one-sided alternative hypothesis, for example, would be x