Segment 13 and 16, Group 2

From Computational Statistics Course Wiki
Jump to navigation Jump to search

Group Members


import math
import numpy
import matplotlib.pyplot as plt
import scipy

def main():
    
    f = open('chrIV.txt', 'r')

    chromosome = f.read()
    length = len(chromosome)

    allPvals = []

    # Pvals for "forward" frames 
    allPvals.append(readFrame(chromosome))
    allPvals.append(readFrame(chromosome[1:length-2]))
    allPvals.append(readFrame(chromosome[2:length-1]))

    # Technically these are supposed to be "transcripted" as well
    reverseChromosome = chromosome[::-1]

    # Pvals for "backward" frames
    allPvals.append(readFrame(reverseChromosome))
    allPvals.append(readFrame(reverseChromosome[1:length-2]))
    allPvals.append(readFrame(reverseChromosome[2:length-1]))

    # Flatten
    allPvals = [y for x in allPvals for y in x]
    allPvals = numpy.array(allPvals)
    
    # Regular
    threshold = 0.05 #/ len(allPvals)
    numSignificant = numpy.where(allPvals < threshold)[0]
    print "Num significant:", len(numSignificant)

    # With Bonferroni correction
    threshold = 0.05 / len(allPvals)
    numSignificant = numpy.where(allPvals < threshold)[0]
    print "Bonferroni numsignificant:" , len(numSignificant)
    

def readFrame(chromosome):
    '''
    Calculate pvals for ORFs in the given strand, starting from the first base.
    '''
    stops = ['TAA', 'TAG', 'TGA']

    breakpoints = []

    for index in range(0,len(chromosome), 3):
        codon = chromosome[index:index+3]
        if codon in stops:
            breakpoints.append(index+2)

    orflengths = [];
    orflengths.append(breakpoints[0])
    for c in range(1, len(breakpoints)):
        orflengths.append(breakpoints[c] - breakpoints[c-1])
    
    pvals = []
    for o in orflengths:
        pvals.append(geometricPval(3.0/64, o))

    return pvals

# If you want to see the histogram of pvals, uncomment this
#plt.hist(pvals, bins=100)
#    plt.show()


def geometricPval(p, k):
    return math.pow(1-p, k)


def geometricPDF(n, p):
    return (math.pow(p, n-1) * (1 - p))



if __name__=="__main__":
    main()