i'm running piece of freely available python code used detect cnvs in single cell sequencing data:
#!/usr/bin/env python import sys def main(): infilename = sys.argv[1] outfilename = sys.argv[2] statfilename = sys.argv[3] chrominfo = ("/path/hg19.chrom.sizes.txt", 0) bins = ("/path/hg19.bin.boundaries.50k.bowtie.k50.sorted.txt", 0) infile = open(infilename, "r") outfile = open(outfilename, "w") statfile = open(statfilename, "w") bincounts = [] in range(len(bins)): bincounts.append(0) print len(bincounts) print len(bins) counter = 0 totalreads = 0 prevchrompos = "" x in infile: arow = x.rstrip().split("\t") thischrom = arow[2] thischrompos = arow[3] if thischrom.find("_") > -1: #print thischrom continue if thischrom == "chrm": #print thischrom continue if thischrom == "": continue if chrominfo.has_key(thischrom): pass else: continue totalreads += 1 thischrominfo = chrominfo[thischrom] thisabspos = long(thischrompos) + long(thischrominfo[2]) counter += 1 indexup = len(bins) - 1 indexdown = 0 indexmid = int((indexup - indexdown) / 2.0) while true: if thisabspos >= long(bins[indexmid][2]): indexdown = indexmid + 0 indexmid = int((indexup - indexdown) / 2.0) + indexmid else: indexup = indexmid + 0 indexmid = int((indexup - indexdown) / 2.0) + indexdown if indexup - indexdown < 2: break bincounts[indexdown] += 1 prevchrompos = thischrompos in range(len(bincounts)): thisratio = float(bincounts[i]) / (float(counter) / float(len(bins))) outfile.write("\t".join(bins[i][0:3])) outfile.write("\t") outfile.write(str(bincounts[i])) outfile.write("\t") outfile.write(str(thisratio)) outfile.write("\n") bincounts.sort() statfile.write("totalreads\tmedianbincount\n") statfile.write(str(totalreads)) statfile.write("\t") statfile.write(str(bincounts[len(bins)/2])) statfile.write("\n") infile.close() outfile.close() statfile.close() def filetodictionary(inputfile, indexcolumn): input = open(inputfile, "r") rd = dict() # input.readline() x in input: arow = x.rstrip().split("\t") id = arow[indexcolumn] if rd.has_key(id): #rd[id].append(arow) print "duplicate knowngene id = " + id print "arow = " + str(arow) print "rd[id] = " + str(rd[id]) else: rd[id] = arow input.close() return(rd) def filetoarray(inputfile, skipfirst): input = open(inputfile, "r") ra = [] in range(skipfirst): input.readline() x in input: arow = x.rstrip().split("\t") ra.append(arow) input.close() return(ra) if __name__ == "__main__": main() i'm getting error on line 40:
traceback (most recent call last): file "/path/varbin.50k.sam.py", line 129, in <module> main() file "/path/varbin.50k.sam.py", line 40, in main **if chrominfo.has_key(thischrom): attributeerror: 'tuple' object has no attribute 'has_key'** i don't work regularly in python, can offer suggestion? begin?
your code expecting dictionary , getting tuple. think you've missed step: need change
chrominfo = ("/path/hg19.chrom.sizes.txt", 0) to
chrominfo = filetodictionary("/path/hg19.chrom.sizes.txt", 0) note if dict.has_key(key) has been deprecated in favour of if key in dict.keys()
Comments
Post a Comment