-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake-batch-dirs
More file actions
executable file
·210 lines (188 loc) · 7.55 KB
/
make-batch-dirs
File metadata and controls
executable file
·210 lines (188 loc) · 7.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/usr/bin/python3
import yaml
import argparse
import os
import pwd
import json
import sys
import subprocess
import shutil
import sheetutils
import logging
import openpyxl
import csv
import pandas as pd# for type hints
# Setup the log file format.
log_formatter = logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(levelname)s %(message)s',datefmt="%Y%m%d %H:%M:%S")
def get_username() -> str:
"""fetch username of user running script"""
return pwd.getpwuid(os.getuid())[0]
def setup_logger(name:str, log_file:str, level=logging.DEBUG):
"""To setup as many loggers as needed"""
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(log_formatter)
console_handler = logging.StreamHandler(sys.stdout)
logger = logging.getLogger(name)
logger.setLevel(level)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
return logger
def read_yaml_file(path: str) -> dict:
"""
read yaml file.
Note: get_data coerces yaml to most appropriate type.
Most of the time this is dict, but might be list or str possibly
"""
with open(path, "r") as stream:
# FullLoader allows yaml to execute arbitrary python
# so script users are assumed to be trusted
return yaml.FullLoader(stream).get_data()
def str_to_bool(value: str) -> bool:
"""
take common 'yes' and 'no' nouns and converts them to boolean
error: raises ArgumentTypeError when noun not found within expected nouns
"""
value = value.strip()
if value.lower() in {'true','t','yes','y','1'}:
return True
elif value.lower() in {'false','f','no','n','0'}:
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected')
def create_batch_folder(scanning_path:str, batch_name:str):
"""
create folder {scanning_path}/{batch_name} in cwd
error: if path exists, then logs error and exits
"""
batch_path = os.path.sep.join([scanning_path,batch_name])
logger.info(f"Creating Batch Path: {batch_path}")
if ( os.path.isdir(batch_path) ):
logger.error(f"Error: {batch_path} exists")
exit(1)
else:
os.mkdir(batch_path)
return batch_path
def copy_xslx_to_batch(batch_path:str):
"""
copy manifest.xslx into batch_path created folder
error: if manifest.xslx does not exist, then log and fail
"""
manifest_path = os.path.sep.join([batch_path,'manifest.xlsx'])
if not os.path.isfile(manifest_path):
logger.info(f"Copying spreadsheet to {manifest_path}")
shutil.copyfile(args.xls_file, manifest_path)
else:
logger.error(f"Error: {manifest_path} already exists.")
exit(1)
def save_xslx_as_csv(batch_path:str):
"""
save args.xls_file into batch_path/manifest.csv
"""
logger.info(f"Creating spreadsheet as csv")
wb = openpyxl.load_workbook(args.xls_file)
sheetnames = wb.sheetnames
sheet_value_arr = []
manifest_csv = os.path.sep.join([batch_path,'manifest.csv'])
for a in sheetnames:
sheet = wb[a]
with open(manifest_csv, "w") as f:
c = csv.writer(f)
for row in sheet.rows:
sheet_value_arr.append([cell.value for cell in row])
#for r in sheet.rows:
c.writerow([cell.value for cell in row])
def make_dirs_from_df(df:pd.DataFrame, batch_path):
"""
read id field of dataframe, and create batch_path/id
for non empty and non None ids.
error: log if mkdir fails. log and exit if id not in
dataframe's columns or dataframe empty
"""
# Make sure the df has an 'id' column and data rows
if 'id' not in df.columns:
logger.error(f"Column 'id' does not exist")
exit(1)
# Make sure the df has rows besides the header row.
if len(df) == 0:
logger.error("Sheet contains no data")
exit(1)
rows = df.iterrows()
for _, row in rows:
if (str(row.loc['id']) and (str(row.loc['id']) != "None" )):
id = str(row.loc['id'])
logger.info(f"Creating {batch_path}/{id}")
object_path = os.path.sep.join([batch_path,id])
try:
os.mkdir(object_path)
except OSError as error:
logger.warning(f"Warning: {object_path} - {error}.")
#
# Main function.
#
def main():
"""
This the main function.
"""
# Parse command line arguements
parser = argparse.ArgumentParser(description='Run..')
parser.add_argument('--config-file', dest="config_file", required=True, help='Config file.')
parser.add_argument('--xls-file', dest="xls_file", required=False, help='XLS file.')
parser.add_argument('--batch-name', dest="batch_name", required=True, help='Name of the batch.')
parser.add_argument('--log-file', dest="log_file", required=False, help='Log file.')
parser.add_argument('--use-google', dest='use_google', type=str_to_bool, required=False, default=False, help='Use Google Sheet.')
parser.add_argument('--google-sheet-id', dest='google_sheet_id', required=False)
parser.add_argument('--google-sheet-name', dest='google_sheet_name', default="Sheet1", required=False)
parser.add_argument('--google-creds-file', dest='google_sheet_creds', required=False)
# Make args a global variable
global args
args = parser.parse_args()
# Set configuration variables from config-file parameter
username = get_username()
cfg = read_yaml_file(args.config_file)
scanning_path = cfg['scanning_path']
# Setup Log file.
global logger
if ( args.log_file ):
logger = setup_logger("log",args.log_file)
else:
logger = setup_logger("log","log.txt")
logger.info(f"Log file created.")
# Setup the batch_name
if ( args.batch_name ):
batch_name = args.batch_name
else:
batch_name = os.path.splitext(os.path.basename(args.xls_file))[0]
logger.info(f"Batch name: {batch_name}")
# If we are using google sheets...
if args.use_google:
if not args.google_sheet_creds:
logger.error(f"Error: --google-sheet-creds is required.")
if not args.google_sheet_id:
logger.error(f"Error: --google-sheet-id is required.")
if not args.google_sheet_name:
logger.error(f"Error: --google-sheet-name is required.")
if args.google_sheet_creds and args.google_sheet_id and args.google_sheet_name and os.path.isfile(args.google_sheet_creds):
logger.info(f"Using Google Sheet: {args.google_sheet_id},{args.google_sheet_name}")
manager = sheetutils.GoogleSheetManager()
manager.connect(args.google_sheet_creds)
sheet = manager.sheet(args.google_sheet_id, args.google_sheet_name)
df = sheet.read()
batch_path = create_batch_folder(scanning_path, batch_name)
make_dirs_from_df(df,batch_path)
else:
logger.error(f"Error: Google arguments are required when using Google Sheets.")
exit()
# Else we are using a Spreadsheet...
else:
logger.info(f"Using Spreadsheet: {args.xls_file}")
logger.info(f"Creating Batch folder: {scanning_path}/{batch_name}")
df = pd.read_excel(args.xls_file)
batch_path = create_batch_folder(scanning_path, batch_name)
make_dirs_from_df(df,batch_path)
logger.info(f"Storing local copy of xlsx file and creating csv file from Google Sheet.")
copy_xslx_to_batch(batch_path)
save_xslx_as_csv(batch_path)
logger.info(f"Batch Path Creation Complete.")
# Main call.
if __name__ == "__main__":
main()