Source code for fastAQ.trimming

# -*- coding: utf-8 -*-
#	Trimming a FASTQ file.
#	Author - Janu Verma
#	jv367@cornell.edu

import string

baseQdict = {x:ord(x) for x in string.printable}

[docs]class Trimming: """ Trimming a FASTQ sequence. Parameters ---------- sequence : The sequence to be trimmed. qualities : Base qualities of the bp's in the sequence. """ def __init__(self, sequence, qualities): self.seq = sequence self.qual = qualities
[docs] def lowQualTrim(self, qualCutOff): """ Trims a sequence by removing low quality bp's. Parameters ---------- qualCutOff : The threshold for the quality. The bases with quality below this threshold will be removed. Returns ------- Trimmed sequence. """ trimmedSeq = '' l = len(self.seq) if (l != len(self.qual)): print "Number of base pairs is not same as the number of quality values." return None else: for i in range(l): qScore = baseQdict[self.qual[i]] qScore = qScore - 33 if (qScore >= qualCutOff): trimmedSeq += self.seq[i] return trimmedSeq
[docs] def mott(self, limitValue): """ Trims a sequence by using Mott's algorithm. Parameters ---------- limitValue : limiting value. Returns ------ Trimmed sequence. """ seq = self.seq qual = self.qual l = len(seq) totalLMP = 0.0 oldLmp = 0.0 i = 0 trimmedSeq = '' while (i < l): qScore = baseQdict[qual[-i-1]] qScore -= 33 expo = float(qScore)/10 pError = 10**(-expo) lmpValue = limitValue - pError oldLmp = totalLMP totalLMP += lmpValue if (totalLMP <= 0.0): totalLMP = 0.0 i +=1 else: trimmedSeq = seq[-i-1] + trimmedSeq if (oldLmp > totalLMP): break i += 1 return trimmedSeq
[docs] def trimSequence(self, interval): """ Trims the sequence from both sides based on the interval. Parameters ---------- interval : The interval containing the number of bp's to be trimmed from left and right side respectively. Returns ------- Trimmed sequence. """ sequence = self.seq start,end = interval n = len(sequence) return sequence[start: -end]