11import boto3
22
3- from vars import ACCOUNT_ID , BUCKET_NAME , DB_NAME , GLUE_CRAWLER_IAM_ROLE , GLUE_CRAWLER_NAME , QUEUE_NAME , REGION
3+ from arns import CRAWLER_IAM_ROLE_ARN , QUEUE_ARN
4+ from logger import logger
5+ from vars import BUCKET , CRAWLER , DB , FOLDER
46
5- CRAWLER_IAM_ROLE_ARN = f"arn:aws:iam::842960110593:role/service-role/{ GLUE_CRAWLER_IAM_ROLE } "
6- S3_PATH = f"s3://{ BUCKET_NAME } /"
7- SQS_ARN = f"arn:aws:sqs:{ REGION } :{ ACCOUNT_ID } :{ QUEUE_NAME } "
7+ S3_PATH = f"s3://{ BUCKET } /{ FOLDER } "
88
99glue = boto3 .client ("glue" , region_name = "us-east-1" )
1010
11- try :
12- response = glue .create_crawler (
13- Name = GLUE_CRAWLER_NAME ,
14- Role = CRAWLER_IAM_ROLE_ARN ,
15- DatabaseName = DB_NAME ,
16- Description = "Crawler for inventory data triggered by SQS events" ,
17- Targets = {
18- "S3Targets" : [
19- {
20- "Path" : S3_PATH ,
21- "EventQueueArn" : SQS_ARN # Enables S3 event-aware crawling
22- }
23- ]
24- },
25- # "On-demand" means we don"t provide a Cron schedule
26- SchemaChangePolicy = {
27- "UpdateBehavior" : "UPDATE_IN_DATABASE" ,
28- "DeleteBehavior" : "DELETE_FROM_DATABASE"
29- },
30- RecrawlPolicy = {
31- "RecrawlBehavior" : "CRAWL_EVENT_MODE" # Processes only events from SQS
32- }
33- )
34- print (f"Crawler created successfully." )
11+ CRAWLER_CONFIG = {
12+ "Role" : CRAWLER_IAM_ROLE_ARN ,
13+ "DatabaseName" : DB ,
14+ "Description" : "Crawler for inventory data triggered by SQS events" ,
15+ "Targets" : {
16+ "S3Targets" : [
17+ {
18+ "Path" : S3_PATH ,
19+ "EventQueueArn" : QUEUE_ARN ,
20+ }
21+ ]
22+ },
23+ "SchemaChangePolicy" : {
24+ "UpdateBehavior" : "UPDATE_IN_DATABASE" ,
25+ "DeleteBehavior" : "DELETE_FROM_DATABASE" ,
26+ },
27+ "RecrawlPolicy" : {"RecrawlBehavior" : "CRAWL_EVENT_MODE" },
28+ }
3529
30+ try :
31+ glue .create_crawler (Name = CRAWLER , ** CRAWLER_CONFIG )
32+ logger .info ("Crawler created successfully." )
3633except glue .exceptions .AlreadyExistsException :
37- print (f"Crawler already exists." )
34+ glue .update_crawler (Name = CRAWLER , ** CRAWLER_CONFIG )
35+ logger .info ("Crawler already exists. Updated configuration successfully." )
3836except Exception as e :
39- print (f"Error creating crawler: { e } " )
37+ logger . error (f"Error creating crawler: { e } " )
0 commit comments