Skip to content

Commit 8d73b4a

Browse files
cutting by length added
modified: fatool/fa.py modified: fatool/tests/test_fa.py
1 parent 3f0ed87 commit 8d73b4a

2 files changed

Lines changed: 29 additions & 19 deletions

File tree

fatool/fa.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33

4-
import re
4+
import re #gex as re
55
import math
66
from fatool import Sequence
77
import logging
@@ -51,11 +51,10 @@ def load_from_file(file):
5151

5252
@staticmethod
5353
def load_content(content):
54-
#print content
55-
nc = content.split('>')
54+
ncs = re.findall(re.compile('(?=(^>[\S\s]+?)(^>|\Z))',re.M), content)
5655
contigs_list = []
57-
for r in nc[1:]:
58-
contigs_list.append(Sequence('>'+r.split('\n', 1)[0].rstrip(), re.sub('^>.*\n', '', '>'+r.rstrip())))
56+
for r in ncs:
57+
contigs_list.append(Sequence(r[0].split('\n', 1)[0].rstrip(), re.sub('^>.*\n', '', r[0].rstrip())))
5958
return contigs_list
6059

6160
def write(self, fafile):
@@ -127,6 +126,14 @@ def remove(self, contigs_name_list):
127126
if not r.name in contigs_name_list:
128127
new_contig_list.append(r)
129128
return Fa(new_contig_list, 'rem_'+self.name)
129+
130+
def cut_min_len(self, min_len):
131+
nc = []
132+
for r in self.contigs:
133+
if len(r) > min_len:
134+
nc.append(r)
135+
return Fa(nc,'cutof_'+str(min_len)+self.name)
136+
130137

131138
def validate(self):
132139
'''

fatool/tests/test_fa.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ def test_setUpFa(self):
2424
f = Fa(cl, 'test-fa')
2525
self.assertEqual(cl, f.contigs)
2626
self.assertEqual('test-fa', f.name)
27-
self.assertEqual({'name':0, 'name2':1, 'name3':2}, f.contigs_idx)
27+
self.assertEqual({'>name':0, '>name2':1, '>name3':2}, f.contigs_idx)
2828
cl.append('something')
2929
with self.assertRaises(TypeError):
3030
Fa(cl, 'name4')
31-
'''
31+
3232
def test_str(self):
3333
cl = []
3434
cl.append(Sequence('>name', 'ACTGactg'))
@@ -69,29 +69,32 @@ def test_add_contigs(self):
6969
def test_show_names(self):
7070
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
7171
f = Fa(cl, 'test-fa')
72-
self.assertEqual(['name','name2','name3'], f.show_names())
72+
self.assertEqual(['>name','>name2','>name3'], f.show_names())
7373
f.add_contig(Sequence('>name2', 'ACTGaaaaaaa'), 1)
74-
self.assertEqual(['name','name3','name2'], f.show_names())
74+
self.assertEqual(['>name','>name3','>name2'], f.show_names())
7575
f.add_contig(Sequence('>name7', 'ACTGaaaaaaa'), 1)
76-
self.assertEqual(['name','name3','name2','name7'], f.show_names())
76+
self.assertEqual(['>name','>name3','>name2','>name7'], f.show_names())
7777

7878
def test_extract(self):
7979
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
8080
f = Fa(cl, 'test-fa')
8181
self.assertEqual(cl, f.contigs)
8282
cl2 = [Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
83-
self.assertEqual(cl2, f.extract(['name2', 'name3']).contigs)
84-
self.assertEqual('extr_test-fa', f.extract(['name2', 'name3']).name)
85-
self.assertEqual(cl2, f.extract(['name2', 'name3', 'name321']).contigs)
83+
self.assertEqual(cl2, f.extract(['>name2', '>name3']).contigs)
84+
print 'printing contigs'
85+
for c in f.extract(['name2', 'name3']).contigs:
86+
print c
87+
self.assertEqual('>extr_test-fa', f.extract(['>name2', '>name3']).name)
88+
self.assertEqual(cl2, f.extract(['>name2', '>name3', '>name321']).contigs)
8689

8790

8891
def test_remove(self):
8992
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')]
9093
f = Fa(cl, 'test-fa')
91-
self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['name']).contigs)
92-
self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['name2','name3']).contigs)
93-
self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['name2','name3','name234']).contigs)
94-
self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['name']).contigs)
94+
self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['>name']).contigs)
95+
self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['>name2','>name3']).contigs)
96+
self.assertEqual([Sequence('>name', 'ACTGactg')], f.remove(['>name2','>name3','>name234']).contigs)
97+
self.assertEqual([Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN')], f.remove(['>name']).contigs)
9598

9699
def test_statistics(self):
97100
cl = [Sequence('>name', 'ACTGactg'), Sequence('>name2', 'NNNNNNNNNACTGNNNN'), Sequence('>name3', 'CTNACtacgatNNNNNNN'), Sequence('>name4', 'CTNAC')]
@@ -187,7 +190,7 @@ def tearDown(self):
187190
os.remove('f2.fa')
188191
os.remove('test.fa')
189192
pass
190-
'''
193+
191194
def test_conv_to_fq(self):
192195
cl = []
193196
test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA'
@@ -198,4 +201,4 @@ def test_conv_to_fq(self):
198201
print fq
199202

200203
if __name__ == "__main__":
201-
unittest.main()
204+
unittest.main()

0 commit comments

Comments
 (0)