-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparseProteinFunction_csv.py
More file actions
executable file
·37 lines (31 loc) · 1.03 KB
/
parseProteinFunction_csv.py
File metadata and controls
executable file
·37 lines (31 loc) · 1.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/python
# Jun Hoe, Lee (2013)
# parse protein sequences downloaded from Ensembl and output the proteinID and function in two columns
# usage "pareProteinFunction.py [input file]"
import sys
import re
# getting input path
try:
inputFile = sys.argv[1]
outputFile = sys.argv[2]
print "inputFile", inputFile
print "outputFile", outputFile
except:
print "No inputFile and/or outputFile provided"
print "Usage: python parseProteinFunction.py [inputFile] [outputFile]"
sys.exit(1)
inFile = open(inputFile, 'r')
outFile = open(outputFile, 'w')
for line in inFile:
characters = line.split(',')
print "line", line
protID = list(characters[0]) # "ENSP00000171887"
description = list(characters[1])
print "characters[0]", characters[0]
print "characters[1]", characters[1]
print "protID[:4]", protID[:3]
if protID[:3] == ['E', 'N', 'S']:
writeLine = characters[0] + "\t" + characters[1] + "\n"
outFile.write(writeLine)
print "writeLine", writeLine
outFile.close()