-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenericparser.py
More file actions
37 lines (31 loc) · 798 Bytes
/
genericparser.py
File metadata and controls
37 lines (31 loc) · 798 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from bs4 import BeautifulSoup
import urllib
import re
import os
import sys, getopt
scriptname = "genericparser.py"
usagetext = scriptname + " -u <urltoparse> -p <parseparameterfile>"
def main(argv):
inputfile = ''
outputfile = ''
try:
opts, args = getopt.getopt(argv,"hu:p:",["url=","pfile="])
except getopt.GetoptError:
print usagetext
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print usagetext
sys.exit()
elif opt in ("-u", "--url"):
inputfile = arg
elif opt in ("-p", "--pfile"):
outputfile = arg
print 'Url is "', inputfile
print 'Parse parameter file is "', outputfile
if __name__ == "__main__":
main(sys.argv[1:])
'''
modsurlwithpage = modsurl + str(pagenum)
soup = BeautifulSoup(urllib.urlopen(modsurlwithpage).read())
'''