@@ -32,7 +32,7 @@ def format_version_string(version_number):
3232
3333class DiffbotJob (DiffbotClient ):
3434 """
35- Various calls for managing a Diffbot Crawlbot or Bulk API job.
35+ Various calls for managing a Crawlbot or Bulk API job.
3636 """
3737
3838 def request (self ,params ):
@@ -51,6 +51,12 @@ def status(self):
5151 response = self .request (self .params )
5252 return response
5353
54+ def update (self ,** kwargs ):
55+ temp_params = self .params
56+ temp_params .update (kwargs )
57+ response = self .request (self .params )
58+ return response
59+
5460 def delete (self ):
5561 temp_params = self .params
5662 temp_params ['delete' ] = 1
@@ -63,19 +69,36 @@ def restart(self):
6369 response = self .request (temp_params )
6470 return response
6571
72+ def download (self ,data_format = "json" ):
73+ """
74+ downloads the JSON output of a crawl or bulk job
75+ """
76+
77+ download_url = '{}/v3/{}/download/{}-{}_data.{}' .format (
78+ self .base_url ,self .jobType ,self .params ['token' ],self .params ['name' ],data_format
79+ )
80+ download = requests .get (download_url )
81+ download .raise_for_status ()
82+ if data_format == "csv" :
83+ return download .content
84+ else :
85+ return download .json ()
86+
6687class DiffbotCrawl (DiffbotJob ):
6788 """
68- Initializes a new Diffbot crawl. Pass additional arguments as necessary.
89+ Initializes a Diffbot crawl. Pass additional arguments as necessary.
6990 """
7091
71- def __init__ (self ,token ,name ,seeds ,api ,apiVersion = 3 ,** kwargs ):
92+ def __init__ (self ,token ,name ,seeds = None ,api = None ,apiVersion = 3 ,** kwargs ):
7293 self .params = {
7394 "token" : token ,
7495 "name" : name ,
7596 }
7697 startParams = dict (self .params )
77- startParams ['seeds' ] = seeds
78- startParams ['apiUrl' ] = self .compose_url (api ,apiVersion )
98+ if seeds :
99+ startParams ['seeds' ] = seeds
100+ if api :
101+ startParams ['apiUrl' ] = self .compose_url (api ,apiVersion )
79102 startParams .update (kwargs )
80103 self .jobType = "crawl"
81104 self .start (startParams )
0 commit comments