#set the correct path where the data are stored setwd("~/tmpBio/") #reading input data from bracelet analysis and questionnaire PAHCompounds= read.table(file="./PAHCompounds", sep = "\t", header = TRUE, row.names = 1) SamplesFeatures= read.table(file="./SamplesFeatures", sep = "\t", header = TRUE, row.names = 1) SamplesFeatures PAHCompounds SamplesFeatures[c(2,1),] SamplesFeatures[c("WB2","WB1"),] #making in the same order the rows SamplesFeaturesSort = SamplesFeatures[(rownames(PAHCompounds)),] SamplesFeaturesSort #inspection of the data sumsamples=summary(SamplesFeatures) sumsamples #do the same for PAHCompounds #creating data for pie plot mytable <- table(SamplesFeatures$Sex) lbls <- paste(names(mytable), "\n", mytable, sep="") pie(mytable, labels = lbls, main="Pie Chart of Sex distribution \n (with sample sizes)") #do the same for other columns #visualization of the data Fluorene <-PAHCompounds$Fluorene Phenantrene <- PAHCompounds$Phenantrene Fluoranthene <- PAHCompounds$Fluoranthene Pyrene <- PAHCompounds$Pyrene boxplot(Fluorene,Phenantrene,Fluoranthene,Pyrene, main = "PAH compounds", names = c("Fluorene", "Phenantrene", "Fluoranthene", "Pyrene"),col = c("orange","green", "blue","red")) #test of data distribution #Kolmogorov-Smirnov test ks.test(PAHCompounds$Fluorene, "pnorm",mean=mean(PAHCompounds$Fluorene),sd=sd(PAHCompounds$Fluorene)) #D is the value of the K-S test statistic. It means the maximum difference between the x & y probability mass function is D. Not that important. #p-value is the important number. The smaller this number is, the less likely that x=y is true. (More technically, it is the probability of finding a situation more extreme than what you have in your data, assuming that x=y) #alternative hypothesis=two-sided. The alternative hypothesis is denoted H1. A one-sided alternative hypothesis, for example, would be x