使用R语言包处理芯片cel格式数据 全自动R代码

26次阅读
没有评论

共计 937 个字符,预计需要花费 3 分钟才能阅读完成。

>source(”http://bioconductor.org/biocLite.R”)
>biocLite()
全自动。只需要设置工作路径以及 filelist.txt 文件即可。
filelist.txt,必须与 CEL 文件一起放置在工作目录内,文件以空格为间隔,分两列,分别是 filename 和 factor 例

filename factor
MS1_(Mouse430_2).cel control
MS2_(Mouse430_2).cel control
MS3_(Mouse430_2).cel control
MS4_(Mouse430_2).cel beta-catenin
MS5_(Mouse430_2).cel beta-catenin
MS6_(Mouse430_2).cel beta-catenin
MS7_(Mouse430_2).cel TCF4
MS8_(Mouse430_2).cel TCF4
MS9_(Mouse430_2).cel TCF4
MS10_(Mouse430_2).cel Icat
MS11_(Mouse430_2).cel Icat
MS12_(Mouse430_2).cel Icat

source(“http://pgfe.umassmed.edu/ou/bioconductor/affyChipAna.R”)
setwd(“~/Documents/Rscript/testData”)
doAll()

更多参数:
dataInput(file=”filelist.txt”,sep=”“,rnames=”filename”)

getExpr(data)

QCreport(data,fastQCfile=”fastQC.pdf”,arrayQMfile=”QCreport”)

getDesign(data)

getContrastMatrix(data,design,constr=NULL)

doAnalyze(data,design,contrast.matrix)

outputData(data,fit2,contrast.matrix,times=2,cut.p.val=0.01,cut.fdr.val=NULL,outputFolder=”output”,all=FALSE,scale=”row”,cluster_rows=TRUE,cluster_cols=TRUE)

doAll(file=”filelist.txt”,workingdir=”.”,sep=”“,rnames=”filename”,fastQCfile=”fastQC.pdf”,arrayQMfile=”QCreport”,
constr=NULL,times=2,cut.p.val=0.01,cut.fdr.val=NULL,outputFolder=”output”,
all=FALSE,scale=”row”,doQC=TRUE,cluster_rows=TRUE,cluster_cols=TRUE)

可以指定 design & contrast.matrix: design and contrast.matrix for limma, coef: report contrast, times: folder-change, cut.fdr.val: P.adj.Val cutoff value

source:
doAll 原代码
# source(“http://bioconductor.org/biocLite.R”)
# biocLite(c(“affy”,”annotate”,”annaffy”,”affyQCReport”,”arrayQualityMetrics”,”limma”,”pheatmap”))

dataInput<-function(file=”filelist.txt”,sep=” “,rnames=”filename”){
require(limma,quietly=TRUE)
require(affy,quietly=TRUE)
require(annotate,quietly=TRUE)
targets<-readTargets(file=file,sep=sep,row.names=rnames) #limma
data <- ReadAffy(filenames=targets$filename) #affy
affydb<-annPkgName(data@annotation,type=”db”)
list(“targets”=targets,”data”=data,”affydb”=affydb)
}
getExpr<-function(data){
require(affy,quietly=TRUE)
require(annotate,quietly=TRUE)
require(annaffy,quietly=TRUE)
require(data$affydb,character.only = TRUE,quietly=TRUE)
data$eset<-rma(data$data,verbose=FALSE)
try({
APInfo<-mas5calls(data$data,verbose=FALSE)
present.call<-exprs(APInfo)
})
eset.e<-exprs(data$eset)
if(exists(“present.call”)){
data$RPMA<-merge(eset.e,present.call,by=”row.names”)
colnames(data$RPMA)<-make.names(gsub(“_\(.*?\)\.CEL”,””,colnames(data$RPMA),ignore.case=TRUE))
colnames(data$RPMA)<-gsub(“\.y$”,”.present”,colnames(data$RPMA))
colnames(data$RPMA)<-gsub(“\.x$”,”.rma”,colnames(data$RPMA))
}else{
colnames(eset.e)<-paste(colnames(eset.e),”rma”,sep=”.”)
Row.names=rownames(eset.e)
data$RPMA<-cbind(Row.names,eset.e)
data$RPMA<-data.frame(data$RPMA)
}
data$symbols<-as.character(aafSymbol(as.character(data$RPMA$Row.names),data$affydb))
names(data$symbols)<-as.character(data$RPMA$Row.names)
data$genes<-as.character(aafUniGene(as.character(data$RPMA$Row.names),data$affydb))
names(data$genes)<-as.character(data$RPMA$Row.names)
data
}
QCreport<-function(data,fastQCfile=”fastQC.pdf”,arrayQMfile=”QCreport”){
require(affyQCReport,quietly=TRUE)
require(arrayQualityMetrics,quietly=TRUE)
QCReport(data$data,file=fastQCfile)
arrayQualityMetrics(expressionset = data$eset, outdir = paste(arrayQMfile,”RMA”,sep=”_”),force=T)
arrayQualityMetrics(expressionset = data$data, outdir = paste(arrayQMfile,”Raw”,sep=”_”),force=T,do.logtransform=T)
eset.e<-exprs(data$eset)
d<-dist(t(eset.e))
hc<-hclust(d,method=”complete”)
pdf(“hclust.complete.pdf”)
plot(hc)
dev.off()
cv.y<-numeric()
y.sd<-apply(eset.e,1,sd)
y.mean<-apply(eset.e,1,mean)
cv.y<-y.sd/y.mean
x<-eset.e
y2<-cbind(x,cv.y)
y2<-y2[y2[,ncol(y2)]>0.10,1:(ncol(y2)-1)]
d<-dist(t(y2))
hc<-hclust(d,”ave”)
pdf(“hclust.ave.pdf”)
plot(hc)
dev.off()
}
getDesign<-function(data){
targets<-data$targets
g<-factor(targets[,2])
design<-model.matrix(~-1+g)
if((ncol(targets)>2)){
fo<-“~-1+g”
for(i in 3:ncol(targets)){
assign(paste(“v”,colnames(targets)[i],sep=””),factor(targets[,i]))
fo<-paste(fo,paste(“v”,colnames(targets)[i],sep=””),sep=”+”)
}
design<-model.matrix(as.formula(fo))
colnames(design)<-gsub(“^v”,””,colnames(design))
}
colnames(design)<-gsub(“^g”,””,colnames(design))
colnames(design)<-gsub(“-“,”_”,colnames(design))
colnames(design)<-make.names(colnames(design))
design
}
getContrastMatrix<-function(data,design,constr=NULL){
require(limma,quietly=TRUE)
if(is.null(constr)){
g<-factor(data$targets[,2])
design.col.names<-levels(g)
if(length(design.col.names)>2){
for(i in 1:(length(design.col.names)-1)){
for(j in (i+1):length(design.col.names)){
constr<-c(constr,paste(design.col.names[i],design.col.names[j],sep=”-“))
}
}
}else{
constr<-paste(design.col.names[2],design.col.names[1],sep=”-“)
}
}
contrast.matrix<-makeContrasts(contrasts=constr,levels=design)
contrast.matrix
}
doAnalyze<-function(data,design,contrast.matrix){
require(limma,quietly=TRUE)
fit<-lmFit(data$eset,design)
fit1<-contrasts.fit(fit,contrast.matrix)
fit2<-eBayes(fit1)
fit2
}
outputData<-function(data,fit2,contrast.matrix,times=2,cut.p.val=0.01,cut.fdr.val=NULL,outputFolder=”output”,all=FALSE,scale=”row”,cluster_rows=TRUE,cluster_cols=TRUE){
require(limma,quietly=TRUE)
require(pheatmap,quietly=TRUE)
dir.create(outputFolder)
正文完

 0