@@ -2,35 +2,46 @@ use libc::{
22 c_char, c_int, ifreq, IFF_NO_PI , IFF_TAP , IFF_VNET_HDR , TUN_F_CSUM , TUN_F_TSO4 , TUN_F_TSO6 ,
33 TUN_F_UFO ,
44} ;
5- use nix:: fcntl:: { fcntl , open, FcntlArg , OFlag } ;
5+ use nix:: fcntl:: { open, OFlag } ;
66use nix:: sys:: stat:: Mode ;
7- use nix:: unistd :: { read , write } ;
7+ use nix:: sys :: uio :: { readv , writev } ;
88use nix:: { ioctl_write_int, ioctl_write_ptr} ;
9- use std:: os:: fd:: { AsRawFd , OwnedFd , RawFd } ;
9+ use std:: os:: fd:: { AsFd , AsRawFd , OwnedFd , RawFd } ;
1010use std:: { io, mem, ptr} ;
11+ use utils:: fd:: SetNonblockingExt ;
1112use virtio_bindings:: virtio_net:: {
1213 VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 , VIRTIO_NET_F_GUEST_TSO6 ,
1314 VIRTIO_NET_F_GUEST_UFO ,
1415} ;
16+ use vm_memory:: GuestMemoryMmap ;
1517
1618use super :: backend:: { ConnectError , NetBackend , ReadError , WriteError } ;
17- use super :: { write_virtio_net_hdr, FRAME_HEADER_LEN } ;
19+ use crate :: virtio:: queue:: Queue ;
20+ use crate :: virtio:: rx_queue_producer:: RxQueueProducer ;
21+ use crate :: virtio:: tx_queue_consumer:: { Consumed , TxQueueConsumer } ;
22+ use crate :: virtio:: InterruptTransport ;
1823
1924ioctl_write_ptr ! ( tunsetiff, b'T' , 202 , c_int) ;
2025ioctl_write_int ! ( tunsetoffload, b'T' , 208 ) ;
2126ioctl_write_ptr ! ( tunsetvnethdrsz, b'T' , 216 , c_int) ;
2227
28+ const MAX_BATCH : usize = 256 ;
29+
2330pub struct Tap {
2431 fd : OwnedFd ,
25- include_vnet_header : bool ,
32+ tx_consumer : TxQueueConsumer ,
33+ rx_producer : RxQueueProducer ,
2634}
2735
2836impl Tap {
2937 /// Create an endpoint using the file descriptor of a tap device
3038 pub fn new (
3139 tap_name : String ,
3240 vnet_features : u64 ,
33- include_vnet_header : bool ,
41+ tx_queue : Queue ,
42+ rx_queue : Queue ,
43+ mem : GuestMemoryMmap ,
44+ interrupt : InterruptTransport ,
3445 ) -> Result < Self , ConnectError > {
3546 let fd = match open ( "/dev/net/tun" , OFlag :: O_RDWR , Mode :: empty ( ) ) {
3647 Ok ( fd) => fd,
@@ -47,10 +58,9 @@ impl Tap {
4758 ) ;
4859 }
4960
50- req. ifr_ifru . ifru_flags = IFF_TAP as i16 | IFF_NO_PI as i16 ;
51- if include_vnet_header {
52- req. ifr_ifru . ifru_flags |= IFF_VNET_HDR as i16 ;
53- }
61+ req. ifr_ifru . ifru_flags = IFF_TAP as i16 | IFF_NO_PI as i16 | IFF_VNET_HDR as i16 ;
62+
63+ log:: info!( "Tap::new() fd={} tap={}" , fd. as_raw_fd( ) , tap_name) ;
5464
5565 let mut offload_flags: u64 = 0 ;
5666 if ( vnet_features & ( 1 << VIRTIO_NET_F_GUEST_CSUM ) ) != 0 {
@@ -71,7 +81,7 @@ impl Tap {
7181 return Err ( ConnectError :: TunSetIff ( io:: Error :: from ( err) ) ) ;
7282 }
7383
74- // TODO(slp): replace hardcoded vnet size with cons
84+ // TODO(slp): replace hardcoded vnet size with const
7585 if let Err ( err) = tunsetvnethdrsz ( fd. as_raw_fd ( ) , & 12 ) {
7686 return Err ( ConnectError :: TunSetVnetHdrSz ( io:: Error :: from ( err) ) ) ;
7787 }
@@ -81,67 +91,71 @@ impl Tap {
8191 }
8292 }
8393
84- match fcntl ( & fd, FcntlArg :: F_GETFL ) {
85- Ok ( flags) => {
86- if let Err ( e) = fcntl (
87- & fd,
88- FcntlArg :: F_SETFL ( OFlag :: from_bits_truncate ( flags) | OFlag :: O_NONBLOCK ) ,
89- ) {
90- warn ! ( "error switching to non-blocking: id={fd:?}, err={e}" ) ;
91- }
92- }
93- Err ( e) => error ! ( "couldn't obtain fd flags id={fd:?}, err={e}" ) ,
94- } ;
94+ if let Err ( e) = fd. set_nonblocking ( true ) {
95+ log:: warn!( "Failed to set O_NONBLOCK on tap: {e}" ) ;
96+ }
97+
98+ let tx_consumer = TxQueueConsumer :: new ( tx_queue, mem. clone ( ) , interrupt. clone ( ) ) ;
99+ let rx_provider = RxQueueProducer :: new ( rx_queue, mem, interrupt) ;
95100
96101 Ok ( Self {
97102 fd,
98- include_vnet_header,
103+ tx_consumer,
104+ rx_producer : rx_provider,
99105 } )
100106 }
101107}
102108
103109impl NetBackend for Tap {
104- /// Try to read a frame from the tap devie. If no bytes are available reports
105- /// ReadError::NothingRead.
106- fn read_frame ( & mut self , buf : & mut [ u8 ] ) -> Result < usize , ReadError > {
107- let buf_offset = if !self . include_vnet_header {
108- write_virtio_net_hdr ( buf)
109- } else {
110- 0
111- } ;
112-
113- let frame_length = match read ( & self . fd , & mut buf[ buf_offset..] ) {
114- Ok ( f) => f,
115- #[ allow( unreachable_patterns) ]
116- Err ( nix:: Error :: EAGAIN | nix:: Error :: EWOULDBLOCK ) => {
117- return Err ( ReadError :: NothingRead )
118- }
119- Err ( e) => {
120- return Err ( ReadError :: Internal ( e) ) ;
110+ fn send ( & mut self ) -> Result < ( ) , WriteError > {
111+ let fd = self . fd . as_fd ( ) ;
112+
113+ self . tx_consumer . feed ( MAX_BATCH ) ;
114+
115+ // Each descriptor chain is one packet. TAP's writev combines iovecs into
116+ // a single packet (scatter-gather), so we can use it directly without
117+ // flattening. One writev call per packet.
118+ let _ = self . tx_consumer . consume ( |frames| {
119+ let mut total = 0usize ;
120+ for frame in frames. iter ( ) {
121+ if frame. is_empty ( ) {
122+ continue ;
123+ }
124+ match writev ( fd, frame) {
125+ Ok ( n) => total += n,
126+ Err ( nix:: errno:: Errno :: EAGAIN ) => break ,
127+ Err ( nix:: errno:: Errno :: EPIPE ) => return Err ( WriteError :: ProcessNotRunning ) ,
128+ Err ( e) => {
129+ log:: error!( "Tap TX failed: {:?}" , e) ;
130+ return Err ( WriteError :: Internal ( e) ) ;
131+ }
132+ }
121133 }
122- } ;
123- debug ! ( "Read eth frame from tap: {frame_length} bytes" ) ;
124- Ok ( buf_offset + frame_length)
125- }
134+ Ok ( Consumed :: Bytes ( total) )
135+ } ) ?;
126136
127- /// Try to write a frame to the tap device.
128- fn write_frame ( & mut self , hdr_len : usize , buf : & mut [ u8 ] ) -> Result < ( ) , WriteError > {
129- let buf_offset = if !self . include_vnet_header {
130- hdr_len
131- } else {
132- FRAME_HEADER_LEN
133- } ;
134- let ret = write ( & self . fd , buf[ buf_offset..] ) . map_err ( WriteError :: Internal ) ?;
135- debug ! ( "Written frame size={}, written={}" , buf. len( ) , ret) ;
136137 Ok ( ( ) )
137138 }
138139
139- fn has_unfinished_write ( & self ) -> bool {
140- false
141- }
140+ fn recv ( & mut self ) -> Result < ( ) , ReadError > {
141+ let fd = self . fd . as_fd ( ) ;
142+
143+ self . rx_producer . feed ( MAX_BATCH ) ;
144+
145+ self . rx_producer . produce ( |chains, completer| {
146+ for ( i, chain) in chains. iter_mut ( ) . enumerate ( ) {
147+ if chain. is_empty ( ) {
148+ warn ! ( "Chain {i} was empty" ) ;
149+ break ;
150+ }
151+
152+ match readv ( fd, chain) {
153+ Ok ( n) => completer. complete ( chain, i, n) ,
154+ Err ( _) => break , // EAGAIN or error, stop receiving
155+ }
156+ }
157+ } ) ;
142158
143- fn try_finish_write ( & mut self , _hdr_len : usize , _buf : & [ u8 ] ) -> Result < ( ) , WriteError > {
144- // The tap backend doesn't do partial writes.
145159 Ok ( ( ) )
146160 }
147161
0 commit comments