-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgitlawca.py
More file actions
110 lines (91 loc) · 3.24 KB
/
gitlawca.py
File metadata and controls
110 lines (91 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from __future__ import unicode_literals
import sys
import getopt
from gitlawca.database import reset_database
from gitlawca.scrape import run as run_scraper
from gitlawca.downloader import start as run_downloader
from gitlawca.gitlawhub import reset_git_repo
from gitlawca.law_parser import parse_raw_document, reformat_document, reformatted_to_markdown
from gitlawca.config import config
from git import Repo
#pylint: disable=R0912
def usage():
print 'gitlawca command line usage'
print ''
print 'python gitlawca.py (-h | --help) | --reset=level | --nuke | --scrape | --download | --test-parser=language | --test-markdown=language'
print ''
print '-h | --help: Print this message'
print '--reset=level: --reset=database: reset Acts database, --reset=github: reset github repository, --reset=all: reset both'
print '--nuke: Same as --reset=all'
print '--scrape: Start scraping federal government Justice department web site for consolidated acts'
print '--download: Start downloading and committing to Github consolidated acts in need of downloading'
print '--test-parser=language: language=eng or fra. Loads the appropriate test fixture and saves to test.html'
print '--test-markdown=language: language=eng or fra. Loads the appropriate test fixture and saves to test.md'
def reset(arg):
repo = Repo(config('download')['folder'])
root_sha = config('github')['root_sha']
if arg == 'database':
reset_database()
elif arg == 'github':
reset_git_repo(repo, root_sha)
elif arg == 'all':
reset_database()
reset_git_repo(repo, root_sha)
else:
usage()
sys.exit()
def scrape():
run_scraper()
def download():
run_downloader()
def test_parser(language):
with open('tests/fixtures/C-41.5-{}.html'.format(language)) as f:
text = f.read()
doc = parse_raw_document(text)
doc = str(reformat_document(doc))
with open('test.html', 'w') as f:
f.write(doc)
def test_markdown(language):
with open('tests/fixtures/C-41.5-{}.html'.format(language)) as f:
text = f.read()
doc = parse_raw_document(text)
doc = reformat_document(doc)
doc = reformatted_to_markdown(doc)
with open('test.md', 'w') as f:
f.write(doc.encode('utf8'))
def main(argv):
try:
opts, _ = getopt.getopt(argv, 'h', ['reset=', 'nuke', 'scrape', 'download', 'test-parser=', 'test-markdown='])
except getopt.GetoptError:
usage()
sys.exit()
if len(opts) == 0:
usage()
sys.exit()
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit()
elif opt == '--reset':
reset(arg)
elif opt == '--nuke':
reset('all')
elif opt == '--scrape':
scrape()
elif opt == '--download':
download()
elif opt == '--test-parser':
if arg in ('eng', 'fra'):
test_parser(arg)
else:
usage()
elif opt == '--test-markdown':
if arg in ('eng', 'fra'):
test_markdown(arg)
else:
usage()
else:
usage()
sys.exit()
if __name__ == '__main__':
main(sys.argv[1:])