-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmerge-quant-tables.py
More file actions
executable file
·96 lines (75 loc) · 2.15 KB
/
merge-quant-tables.py
File metadata and controls
executable file
·96 lines (75 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python2
import os
import sys
import gzip
from optparse import OptionParser
###############################################################################
USAGE = """
python merge-quant-tables.py --out <output table for merged results>
--in < set of inputs to merge>
Assumes --in files are the output of quantify-clusters.py
"""
parser = OptionParser(USAGE)
parser.add_option('--out',dest='outFile', help = 'output file for merged results')
parser.add_option('--in',dest='inFiles', help = 'files to merge')
(options, args) = parser.parse_args()
if options.outFile is None:
parser.error('outFile not given')
if options.inFiles is None:
parser.error('inFiles not given')
###############################################################################
toDo = []
toDo.append(options.inFiles)
for f in args:
toDo.append(f)
print 'have %i to merge' % len(toDo)
nameList = []
zipListOrder = []
zipCounts = {}
zipsToDo = {}
# first, read in order from file 1
inFile = open(toDo[0],'r')
for line in inFile:
if line[0] == '#':
continue
line = line.rstrip()
line = line.split()
zipListOrder.append([line[0],line[1]])
zipsToDo[line[1]] = 1
inFile.close()
print 'Found %i zips' % len(zipListOrder)
for fn in toDo:
print fn
name = fn.split('/')[-1].split('.')[0]
print 'name is',name
nameList.append(name)
inFile = open(fn,'r')
for line in inFile:
if line[0] == '#':
continue
line = line.rstrip()
line = line.split()
z = line[1]
freq = line[2]
if z not in zipsToDo:
print 'ERROR! found a zip not in set'
print name
print fn
print line
sys.exit()
zipCounts[(z,name)] = freq
inFile.close()
print 'Read in all!'
outFile = open(options.outFile,'w')
nl = ['#originalRank','zipCode']
nl.extend(nameList)
nl = '\t'.join(nl) + '\n'
outFile.write(nl)
for i in zipListOrder:
nl = [i[0],i[1]]
z = i[1]
for n in nameList:
nl.append(zipCounts[(z,n)])
nl = '\t'.join(nl) + '\n'
outFile.write(nl)
outFile.close()