-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcdse-deleted-inventory.sh
More file actions
143 lines (124 loc) · 5.4 KB
/
cdse-deleted-inventory.sh
File metadata and controls
143 lines (124 loc) · 5.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/bin/bash
export TZ=UTC
function usage {
>&2 echo "This script can be used to retrieve a list of the deleted products from the CDSE OData catalogue."
>&2 echo ""
>&2 echo "USAGE:"
>&2 echo "$0 [-c|--condition=...] [-s|--select=Id,Name] [-b|--batchsize=100] [-l|--limit=50] [-u|--odataUrl=$odataUrl] [-q|--quiet]"
>&2 echo " --condition is the OData query filter, examples for each sentinel mission:"
>&2 echo " -c=\"Collection/Name eq 'SENTINEL-1' and not substringof('_RAW_',Name)\""
>&2 echo " -c=\"Collection/Name eq 'SENTINEL-2' and substringof('_MSIL1C_',Name)\""
>&2 echo " -c=\"Collection/Name eq 'SENTINEL-3' and (substringof('_OL_',Name) or substringof('_SL_',Name) or substringof('_SR_',Name))\""
>&2 echo " -c=\"Collection/Name eq 'SENTINEL-5P' and substringof('_L2_',Name)\""
>&2 echo " --select fields to retrieve and return in the CSV list, default is:"
>&2 echo " Id,Name,ContentLength,Checksum,OriginDate,DeletionDate,DeletionCause"
>&2 echo " --odataUrl of the data hub service (default is $odataUrl)"
>&2 echo " --from date (default is yesterday: $fromDate)"
>&2 echo " --until date (default is midnight: $untilDate)"
>&2 echo " --increment \"1 day\""
>&2 echo " --wgetrc /path/to/.wgetrc (file with user=xxx and password=yyy of the OData service account)"
>&2 echo " --batchsize for iterating over the result (default=50)"
>&2 echo " --limit the amount of products to be retrieved (default=100)"
>&2 echo " --quiet avoids progress output to stderr"
>&2 echo "Result is ordered by DeletionDate to ensure linear sequence"
>&2 echo "Output is sent to stdout in the CSV format with header."
>&2 echo ""
>&2 echo "Example to retrieve all deleted S2 products from yesterday:"
>&2 echo " $0 -w=.cdserc -l=200000 -b=1000 -c=\"collection/Name eq 'SENTINEL-2'\""
>&2 echo ""
exit 1;
}
# defaults
TZ=UTC
condition=''
select="Id,Name,ContentLength,Checksum,OriginDate,DeletionDate,DeletionCause"
odataUrl="https://catalogue.dataspace.copernicus.eu/odata/v1"
fromDate="$(date -u +%Y-%m-%d --date='1 day ago')T00:00:00.000000Z"
untilDate="$(date -u +%Y-%m-%d)T23:59:59.999999Z"
increment="1 day"
batchsize=50
limit=100
quiet=false
while [ "$#" -gt 0 ]; do
case "$1" in
-c|--condition) condition="$2"; shift 2;;
-s|--select) select="$2"; shift 2;;
-o|--odataUrl) odataUrl="$2"; shift 2;;
-f|--from) fromDate="$2"; shift 2;;
-u|--until) untilDate="$2"; shift 2;;
-i|--increment) increment="$2"; shift 2;;
-w|--wgetrc) export WGETRC="$2"; shift 2;;
-b|--batchsize) batchsize="$2"; shift 2;;
-l|--limit) limit="$2"; shift 2;;
-q|--quiet) quiet=true; shift 1;;
-c=*|--condition=*) condition="${1#*=}"; shift 1;;
-s=*|--select=*) select="${1#*=}"; shift 1;;
-o=*|--odataUrl=*) odataUrl="${1#*=}"; shift 1;;
-f=*|--from=*) fromDate="${1#*=}"; shift 1;;
-u=*|--until=*) untilDate="${1#*=}"; shift 1;;
-i=*|--increment) increment="${1#*=}"; shift 1;;
-w=*|--wgetrc=*) export WGETRC="${1#*=}"; shift 1;;
-b=*|--batchsize=*) batchsize="${1#*=}"; shift 1;;
-l=*|--limit=*) limit="${1#*=}"; shift 1;;
*) echo "ERROR: unknown option '$1'"; usage; exit;;
esac
done
# query for new data
#odataQuery="$(echo -n "\$top=$batchsize${select:+&\$select=$select}&\$orderby=DeletionDate asc${condition:+&\$filter=$condition}" | sed 's/ /%20/g;s/'\''/%27/g;s/(/%28/g;s/)/%29/g;')"
csvjq=".value[] | [ .$(echo $select | sed -e 's/,/, ./g' -e 's/Checksum/Checksum[0].Value/') ] | @tsv | gsub(\"\\t\";\",\")"
startDate=$(date -u -d "$fromDate" +%Y-%m-%dT%H:%M:%S.%N | cut -c1-26)Z
pos=0
header="$select\n"
SECONDS=0
while [ $pos -le $limit ]
do
nextEnd="$(date +%Y-%m-%dT%H:%M:%S --date="$startDate + $increment").999999Z"
if [ $(date -d "$nextEnd" +%s) -lt $(date -d "$untilDate" +%s) ]; then
endDate=$nextEnd
else
endDate=$(date -u -d "$untilDate" +%Y-%m-%dT%H:%M:%S).999999Z
fi
query="${condition:+$condition and } DeletionDate gt ${startDate} and DeletionDate lt ${endDate}"
$quiet || >&2 echo "$(date -d @$SECONDS +%H:%M:%S) @$pos query: $query"
result=$(curl -s "$odataUrl/DeletedProducts?\$top=$batchsize&\$select=$select&\$orderby=DeletionDate%20asc&\$filter=$(echo -n "$query" | sed 's/ /%20/g;s/'\''/%27/g;s/(/%28/g;s/)/%29/g;')" )
status=$?
if [ $status != 0 ]; then
>2& echo "query failed with status=$status"
if [ ${#lines[@]} > 0 ]; then
>2& printf '%s\n' "${result}"
fi
break
fi
# ensure it is a json string
if [ ${result:0:1} != '{' ]; then
>2& printf 'JSON error: %s\n' "${result}"
break
fi
echo -ne $header
echo $result | jq -r "$csvjq"
count=$(echo $result | jq -r "$csvjq" | wc -l)
startDate=$(echo $result | jq -r ".value[].DeletionDate" | tail -1)
if [ "$startDate" == "" ]; then
# skip to next month
startDate=$endDate
fi
# advance retrieval position and stop when limit count reached
pos=$(($pos + $count))
if [ $pos -ge $limit ]; then
$quiet || >2& printf "Limit reached @${pos}."
break
fi
# stop when limit date reached
if [ $(date -d "$startDate" +%s) -ge $(date -d "$untilDate" +%s) ]; then
#$quiet || >2& printf 'Done.'
break
fi
header=""
#odataQuery="$(echo $result | jq -r '.["@odata.nextLink"]')"
#if [[ "$odataQuery" == "${odataQuery/skip=/}" ]]; then
# $quiet || >2& printf 'Fail.'
# break
#fi
done
## finish printing progress line
$quiet || >&2 echo ""