-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdfinfo.py
More file actions
91 lines (74 loc) · 2.5 KB
/
pdfinfo.py
File metadata and controls
91 lines (74 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Retrieve document info from a PDF file.
Usage:
python pdfinfo.py --inpath "path/file"
Command line options:
--inpath Path and file name of input PDF file
Example: Retrieve and display document info for doc.pdf
python pdfinfo.py --inpath doc.pdf
"""
import argparse
import pypdf
import os
import pdftools_utils as pu
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--inpath'
, help='Input path/file'
, type=str, default = '')
return parser.parse_args()
class PdfInfo:
def __init__(self):
self.doc_info = str()
self.msg = ''
def validate_inputs(self, **kwargs):
"""
Test for valid inputs and return status.
Check for existence and validity of PDF input file.
Ensure proper format of rotation input.
"""
self.args_d = kwargs
if not os.path.isfile(self.args_d['inpath']):
ok = False
self.msg = 'Cannot find input file {0}'.format(self.args_d['inpath'])
elif not pu.ispdf(self.args_d['inpath']):
ok = False
self.msg = '{0} does not look like a valid PDF.'.format(self.args_d['inpath'])
elif pu.isRestricted(self.args_d['inpath']):
ok = False
self.msg = 'File is restricted:\n {0}'.format(self.args_d['inpath'])
else:
ok = True
self.msg = 'Inputs validated'
return ok
def status(self):
return self.msg
def get_doc_info(self):
return self.doc_info
def process(self):
"""
Main processing core.
Retrieve document info
"""
ok = True
try:
with open(self.args_d['inpath'], 'rb') as fr:
Reader = pypdf.PdfReader(fr)
if Reader.is_encrypted:
Reader.decrypt('')
info = Reader.metadata
self.doc_info = 'Pages: {0}'.format(len(Reader.pages)) + '\n'
for item in info:
self.doc_info += '{0} = {1}'.format(item[1:], info[item]) + '\n'
self.doc_info = self.doc_info[:-1]
except Exception:
# no msg update, errors already caught in validate()
ok = False
return ok
if __name__ == "__main__":
args = parse_args()
P = PdfInfo()
if not (P.validate_inputs(**vars(args)) and P.process()):
print(P.status())
else:
print(P.doc_info)