-
Notifications
You must be signed in to change notification settings - Fork 2
/
testcollege.py
66 lines (45 loc) · 1.38 KB
/
testcollege.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import sys
import re
"""
This file is tested with test-page.html
"""
"""Key for searching:
for college name:
search for 006699
after that is the college name.
Stop at font>
for college address:
collegename end + 5
after that is college location
Stop at <b
for college year:
college add end +16
after that is college year
stop at college add end +20
"""
cname='some'
f = open(sys.argv[1] , 'rU')
html = str(f.read())
f.close()
cnames = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
for num in range(100):
namestart = html.find('006699')
nameend = html.find('font>', namestart)
cname = html[namestart+7:nameend-2]
print cname
cnames[num].append(cname)
#now search for college address
addend = html.find('<b', nameend+5)
cadd = html[nameend+6:addend-2]
print cadd
cnames[num].append(cadd)
#search for year of estd.
cestd = html[addend+17:addend+21]
print cestd
try:
cnames[num].append(int(cestd))
except ValueError:
pass
html = html[nameend:]
for cname in cnames:
print cname