Skip to content

Commit 9edf29e

Browse files
committed
add openvpn data formats & observations
1 parent b8c5d50 commit 9edf29e

11 files changed

Lines changed: 306 additions & 11 deletions

File tree

oonidata/src/oonidata/models/dataformats.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ class DNSQuery(BaseModel):
284284
dial_id: Optional[int] = None
285285

286286

287+
287288
@add_slots
288289
@dataclass
289290
class TCPConnectStatus(BaseModel):
@@ -365,3 +366,42 @@ class NetworkEvent(BaseModel):
365366
# Deprecated fields
366367
dial_id: Optional[int] = None
367368
conn_id: Optional[int] = None
369+
370+
371+
@add_slots
372+
@dataclass
373+
class OpenVPNHandshake(BaseModel):
374+
handshake_time: float
375+
endpoint: str
376+
ip: str # we might want to make this optional, and scrub in favor of ASN/prefix
377+
port: int
378+
transport: str
379+
provider: str
380+
t0: float
381+
t: float
382+
openvpn_options: Optional[Dict[str, str]] = None
383+
tags: Optional[List[str]] = None
384+
transaction_id: Optional[str] = None
385+
failure: Failure = None
386+
387+
@add_slots
388+
@dataclass
389+
class OpenVPNPacket(BaseModel):
390+
operation: str
391+
opcode: str
392+
id: int
393+
payload_size: int
394+
acks: Optional[List[int]] = None
395+
send_attempts: Optional[int] = None
396+
397+
398+
@add_slots
399+
@dataclass
400+
class OpenVPNNetworkEvent(BaseModel):
401+
operation: str
402+
stage: str
403+
t: float
404+
tags: Optional[List[str]] = None
405+
packet: Optional[OpenVPNPacket] = None
406+
transaction_id: Optional[int] = None
407+

oonidata/src/oonidata/models/nettests/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .whatsapp import Whatsapp
1414
from .http_invalid_request_line import HTTPInvalidRequestLine
1515
from .http_header_field_manipulation import HTTPHeaderFieldManipulation
16+
from .openvpn import OpenVPN
1617

1718
SUPPORTED_CLASSES = [
1819
HTTPHeaderFieldManipulation,
@@ -27,6 +28,7 @@
2728
Signal,
2829
FacebookMessenger,
2930
Whatsapp,
31+
OpenVPN,
3032
BaseMeasurement,
3133
]
3234
SupportedDataformats = Union[
@@ -42,6 +44,7 @@
4244
Signal,
4345
FacebookMessenger,
4446
Whatsapp,
47+
OpenVPN,
4548
BaseMeasurement,
4649
]
4750

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from dataclasses import dataclass
2+
from typing import List, Optional
3+
4+
from ..base import BaseModel
5+
6+
from oonidata.compat import add_slots
7+
from oonidata.models.dataformats import (
8+
BaseTestKeys,
9+
Failure,
10+
TCPConnect,
11+
OpenVPNHandshake,
12+
OpenVPNNetworkEvent,
13+
)
14+
from oonidata.models.nettests.base_measurement import BaseMeasurement
15+
16+
17+
@add_slots
18+
@dataclass
19+
class OpenVPNTestKeys(BaseTestKeys):
20+
success: Optional[bool] = False
21+
failure: Failure = None
22+
23+
network_events: Optional[List[OpenVPNNetworkEvent]] = None
24+
tcp_connect: Optional[List[TCPConnect]] = None
25+
openvpn_handshake: Optional[List[OpenVPNHandshake]] = None
26+
27+
bootstrap_time: Optional[float] = None
28+
tunnel: str = None
29+
30+
31+
@add_slots
32+
@dataclass
33+
class OpenVPN(BaseMeasurement):
34+
__test_name__ = "openvpn"
35+
36+
test_keys: OpenVPNTestKeys

oonidata/src/oonidata/models/observations.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,3 +383,88 @@ class HTTPMiddleboxObservation:
383383
hfm_diff: Optional[str] = None
384384
hfm_failure: Optional[str] = None
385385
hfm_success: Optional[bool] = None
386+
387+
388+
@table_model(
389+
table_name="obs_openvpn",
390+
table_index=(
391+
"measurement_start_time",
392+
"measurement_uid",
393+
"observation_idx",
394+
),
395+
)
396+
@dataclass
397+
class OpenVPNObservation:
398+
measurement_meta: MeasurementMeta
399+
400+
probe_meta: ProbeMeta
401+
402+
observation_idx: int = 0
403+
404+
created_at: Optional[datetime] = None
405+
406+
timestamp: datetime = None
407+
408+
# Fields added by the processor
409+
410+
ip: str = ""
411+
port: int = 0
412+
transport: str = ""
413+
414+
success: bool = False
415+
failure: Failure = None
416+
417+
protocol: str = ""
418+
variant: Optional[str] = None
419+
420+
# TCP related observation
421+
tcp_failure: Optional[Failure] = None
422+
tcp_success: Optional[bool] = None
423+
tcp_t: Optional[float] = None
424+
425+
# OpenVPN handshake observation
426+
openvpn_handshake_failure: Optional[Failure] = None
427+
openvpn_handshake_t: Optional[float] = None
428+
openvpn_handshake_t0: Optional[float] = None
429+
openvpn_bootstrap_time: Optional[float] = None
430+
431+
# timing info about the handshake packets
432+
openvpn_handshake_hr_client_t: Optional[float] = None
433+
openvpn_handshake_hr_server_t: Optional[float] = None
434+
openvpn_handshake_clt_hello_t: Optional[float] = None
435+
openvpn_handshake_srv_hello_t: Optional[float] = None
436+
openvpn_handshake_key_exchg_n: Optional[int] = None
437+
openvpn_handshake_got_keys__t: Optional[float] = None
438+
openvpn_handshake_gen_keys__t: Optional[float] = None
439+
440+
441+
442+
443+
@table_model(
444+
table_name="obs_tunnel",
445+
table_index= ("measurement_uid", "observation_idx", "measurement_start_time"),
446+
)
447+
@dataclass
448+
class TunnelEndpointObservation:
449+
measurement_meta: MeasurementMeta
450+
probe_meta: ProbeMeta
451+
452+
measurement_start_time: datetime
453+
454+
ip: str
455+
port: int
456+
transport: str
457+
458+
# definition of success will need to change when/if we're able to gather metrics
459+
# through the tunnel.
460+
success: bool
461+
failure: Failure
462+
463+
protocol: str
464+
family: str
465+
466+
# indicates obfuscation or modifications from the main protocol family.
467+
variant: Optional[str] = None
468+
469+
# any metadata about the providers behind the endpoints.
470+
provider: Optional[str] = None

oonipipeline/Design.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ needed.
1212

1313
### Expose a queriable low level view on measurements
1414

15-
Currently it's only possible to query measurement at a granuliaty which is as
16-
fine a measurement.
15+
Currently it's only possible to query measurement at a granularity which is as
16+
fine as a measurement.
1717

1818
This means that it's only possible to answer questions which the original
19-
designer of the experiment had already throught of.
19+
designer of the experiment had already thought of.
2020

2121
On the other hand the new pipeline breaks down measurements into distinct
2222
observations (think 1 DNS query and answer or 1 TLS handshake towards a
@@ -145,16 +145,17 @@ port combination.
145145

146146
You can run the observation generation with a clickhouse backend like so:
147147

148+
TODO(art): check this is correct.
149+
148150
```
149-
poetry run python -m oonidata mkobs --clickhouse clickhouse://localhost/ --data-dir tests/data/datadir/ --start-day 2022-08-01 --end-day 2022-10-01 --create-tables --parallelism 20
151+
hatch run oonipipeline --probe-cc US --test-name signal --workflow-name observations --start-at 2022-08-01 --end-at 2022-10-01
150152
```
151153

152154
Here is the list of supported observations so far:
153155

154156
- [x] WebObservation, which has information about DNS, TCP, TLS and HTTP(s)
155157
- [x] WebControlObservation, has the control measurements run by web connectivity (is used to generate ground truths)
156-
- [ ] CircumventionToolObservation, still needs to be designed and implemented
157-
(ideally we would use the same for OpenVPN, Psiphon, VanillaTor)
158+
- [x] OpenVPNObservation, with measurements run by the openvpn experiment.
158159

159160
### Response body archiving
160161

oonipipeline/Readme.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ For historical context, these are the major revisions:
88
- `v1` - OONI Pipeline based on custom CLI scripts using mongodb as a backend. Used until ~2015.
99
- `v2` - OONI Pipeline based on [luigi](https://luigi.readthedocs.io/en/stable/). Used until ~2017.
1010
- `v3` - OONI Pipeline based on [airflow](https://airflow.apache.org/). Used until ~2020.
11-
- `v4` - OONI Pipeline basedon custom script and systemd units (aka fastpath). Currently in use in production.
11+
- `v4` - OONI Pipeline based on custom script and systemd units (aka fastpath). Currently in use in production.
1212
- `v5` - Next generation OONI Pipeline. What this readme is relevant to. Expected to become in production by Q4 2024.
1313

1414
## Setup
@@ -41,13 +41,19 @@ clickhouse server
4141

4242
Workflows are started by first scheduling them and then triggering a backfill operation on them. When they are scheduled they will also run on a daily basis.
4343

44+
4445
```
45-
hatch run oonipipeline schedule --probe-cc US --test-name signal --create-tables
46+
hatch run oonipipeline schedule --probe-cc US --test-name signal
4647
```
4748

4849
You can then trigger the backfill operation like so:
4950
```
50-
hatch run oonipipeline backfill --probe-cc US --test-name signal --workflow-name observations --start-at 2024-01-01 --end-at 2024-02-01
51+
hatch run oonipipeline backfill --create-tables --probe-cc US --test-name signal --workflow-name observations --start-at 2024-01-01 --end-at 2024-02-01
52+
```
53+
54+
If you need to re-create the database tables (because the schema has changed), you want to add the `--drop-tables` flag to the invocation:
55+
```
56+
hatch run oonipipeline backfill --create-tables --drop-tables --probe-cc US --test-name signal --workflow-name observations --start-at 2024-01-01 --end-at 2024-02-01
5157
```
5258

5359
You will then need some workers to actually perform the task you backfilled, these can be started like so:

oonipipeline/src/oonipipeline/cli/commands.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ async def main():
189189
@click.option(
190190
"--analysis/--no-analysis",
191191
default=True,
192-
help="should we drop tables before creating them",
192+
help="schedule analysis too",
193193
)
194194
def schedule(probe_cc: List[str], test_name: List[str], analysis: bool):
195195
"""

oonipipeline/src/oonipipeline/db/create_tables.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
WebControlObservation,
2929
WebObservation,
3030
HTTPMiddleboxObservation,
31+
OpenVPNObservation,
3132
)
3233

3334
from .connections import ClickhouseConnection
@@ -170,6 +171,7 @@ def format_create_query(
170171
table_models = [
171172
WebObservation,
172173
WebControlObservation,
174+
OpenVPNObservation,
173175
HTTPMiddleboxObservation,
174176
WebAnalysis,
175177
MeasurementExperimentResult,

0 commit comments

Comments
 (0)