[toc]python
最近因为老板有分析项目,我实在是进展缓慢,一直苦恼并艰难的探索和进展,因此很长时间没有和你们见面了,今天我为你们带来的source tracker分析,使用前一段时间刚出来的工具FEAST。git
刘老师对这片文章进行了详细的解读: Nature Methods:快速准确的微生物来源追溯工具FEAST。跟着刘老师的步伐,今天我对这个工具进行一个尝试。为何做者不将这个工具封装到R包呢这样不就更容易了吗?可能好多小伙伴都没有从github上克隆过项目。github
不单单是这一次,我在以后所有的分析都会将整个群落封装到phylsoeq,只是为了更好的更加灵活的对微生物群落数据进行分析,固然你们若是初次见面,可能须要安装依赖极多的phyloseq包。须要熟悉phylsoeq封装的结构和调用方法。微信
为了让你们更容易操做,我把数据保存为csv,方便还没有解除phylsoeq的小伙伴进行无压力测试。网络
FEAST提供两种方式来作微生物来源分析。函数
首先咱们来演示基于单个目标样品和来源样品的来源分析工具
# rm(list = ls()) # gc() path = "./phyloseq_7_source_FEAST" dir.create(path) ##导入主函数 source("./FEAST-master/FEAST_src//src.R") ps = readRDS("./a3_DADA2_table/ps_OTU_.ps") # 导入分组文件和OTU表格 metadata <- as.data.frame(sample_data(ps)) head(metadata) write.csv(metadata,"metadata.csv",quote = F) # Load OTU table vegan_otu <- function(physeq){ OTU <- otu_table(physeq) if(taxa_are_rows(OTU)){ OTU <- t(OTU) } return(as(OTU,"matrix")) } otus <- as.data.frame(t(vegan_otu(ps))) write.csv(otus,"otus.csv",quote = F) otus <- t(as.matrix(otus)) ###下面区分目标样品和来源样品。 envs <- metadata$SampleType metadata<- arrange(metadata, SampleType) metadata$id = rep(1:6,4) Ids <- na.omit(unique(metadata$id)) it = 1 train.ix <- which(metadata$SampleType%in%c("B","C","D")& metadata$id == Ids[it]) test.ix <- which(metadata$SampleType=='A' & metadata$id == Ids[it]) # Extract the source environments and source/sink indices num_sources <- length(train.ix) #number of sources COVERAGE = min(rowSums(otus[c(train.ix, test.ix),])) #Can be adjusted by the user #对两组样品进行抽平 sources <- as.matrix(rarefy(otus[train.ix,], COVERAGE)) sinks <- as.matrix(rarefy(t(as.matrix(otus[test.ix,])), COVERAGE)) dim(sinks) print(paste("Number of OTUs in the sink sample = ",length(which(sinks > 0)))) print(paste("Seq depth in the sources and sink samples = ",COVERAGE)) print(paste("The sink is:", envs[test.ix])) # Estimate source proportions for each sink EM_iterations = 1000 # number of EM iterations. default value FEAST_output<-FEAST(source=sources, sinks = t(sinks), env = envs[train.ix], em_itr = EM_iterations, COVERAGE = COVERAGE) Proportions_est <- FEAST_output$data_prop[,1] names(Proportions_est) <- c(as.character(envs[train.ix]), "unknown") print("Source mixing proportions") Proportions_est round(Proportions_est,3)
基于多个目标和来源的微生物来源分析: different_sources_flags设置目标样品和来源样品的对应关系。是否不一样目标对应不一样来源样品,仍是不一样目标对应相同来源样品学习
##导入主函数 source("./FEAST-master/FEAST_src//src.R") ps = readRDS("./a3_DADA2_table/ps_OTU_.ps") # 导入分组文件和OTU表格 metadata <- as.data.frame(sample_data(ps)) head(metadata) # Load OTU table vegan_otu <- function(physeq){ OTU <- otu_table(physeq) if(taxa_are_rows(OTU)){ OTU <- t(OTU) } return(as(OTU,"matrix")) } otus <- as.data.frame(t(vegan_otu(ps))) otus <- t(as.matrix(otus)) head(metadata) metadata<- arrange(metadata, SampleType) metadata$id = rep(1:6,4) EM_iterations = 1000 #default value different_sources_flag = 1 envs <- metadata$SampleType Ids <- na.omit(unique(metadata$id)) Proportions_est <- list() it = 1 for(it in 1:length(Ids)){ # Extract the source environments and source/sink indices if(different_sources_flag == 1){ train.ix <- which(metadata$SampleType%in%c("B","C","D")& metadata$id == Ids[it]) test.ix <- which(metadata$SampleType=='A' & metadata$id == Ids[it]) } else{ train.ix <- which(metadata$SampleType%in%c("B","C","D")) test.ix <- which(metadata$SampleType=='A' & metadata$id == Ids[it]) } num_sources <- length(train.ix) COVERAGE = min(rowSums(otus[c(train.ix, test.ix),])) #Can be adjusted by the user # Define sources and sinks sources <- as.matrix(rarefy(otus[train.ix,], COVERAGE)) sinks <- as.matrix(rarefy(t(as.matrix(otus[test.ix,])), COVERAGE)) print(paste("Number of OTUs in the sink sample = ",length(which(sinks > 0)))) print(paste("Seq depth in the sources and sink samples = ",COVERAGE)) print(paste("The sink is:", envs[test.ix])) # Estimate source proportions for each sink FEAST_output<-FEAST(source=sources, sinks = t(sinks), env = envs[train.ix], em_itr = EM_iterations, COVERAGE = COVERAGE) Proportions_est[[it]] <- FEAST_output$data_prop[,1] names(Proportions_est[[it]]) <- c(as.character(envs[train.ix]), "unknown") if(length(Proportions_est[[it]]) < num_sources +1){ tmp = Proportions_est[[it]] Proportions_est[[it]][num_sources] = NA Proportions_est[[it]][num_sources+1] = tmp[num_sources] } print("Source mixing proportions") print(Proportions_est[[it]]) } print(Proportions_est) went = as.data.frame(Proportions_est) colnames(went) = paste("repeat_",unique(metadata$id),sep = "") head(went) filename = paste(path,"/FEAST.csv",sep = "") write.csv(went,filename,quote = F)
library(RColorBrewer) library(dplyr) library(graphics) head(went) plotname = paste(path,"/FEAST.pdf",sep = "") pdf(file = plotname,width = 12,height = 12) par(mfrow=c((length(unique(metadata$SampleType))%/%2 +2 ),2), mar=c(1,1,1,1)) # layouts = as.character(unique(metadata$SampleType)) for (i in 1:length(colnames(went))) { labs <- paste0(row.names(went)," \n(", round(went[,i]/sum(went[,i])*100,2), "%)") pie(went[,i],labels=labs, init.angle=90,col = brewer.pal(nrow(went), "Reds"), border="black",main =colnames(went)[i] ) } dev.off()
咱们做为生物可能测定9个以上重复了,若是展现九个饼图,那就显得太夸张了,求均值,展现均值饼图测试
head(went) asx = as.data.frame(rowMeans(went)) asx = as.matrix(asx) asx_norm = t(t(asx)/colSums(asx)) #* 100 # normalization to total 100 head(asx_norm) plotname = paste(path,"/FEAST_mean.pdf",sep = "") pdf(file = plotname,width = 6,height = 6) labs <- paste0(row.names(asx_norm)," \n(", round(asx_norm[,1]/sum(asx_norm[,1])*100,2), "%)") pie(asx_norm[,1],labels=labs, init.angle=90,col = brewer.pal(nrow(went), "Reds"), border="black",main = "mean of source tracker") dev.off()