debugging, add async_tag_lock

This commit is contained in:
John Smith
2022-10-02 18:47:36 -04:00
parent 046b61d5d8
commit 0a01c0d23e
21 changed files with 690 additions and 273 deletions
@@ -4,7 +4,7 @@ use super::*;
pub struct ConnectionHandle {
id: u64,
descriptor: ConnectionDescriptor,
channel: flume::Sender<Vec<u8>>,
channel: flume::Sender<(Option<Id>, Vec<u8>)>,
}
#[derive(Debug)]
@@ -17,7 +17,7 @@ impl ConnectionHandle {
pub(super) fn new(
id: u64,
descriptor: ConnectionDescriptor,
channel: flume::Sender<Vec<u8>>,
channel: flume::Sender<(Option<Id>, Vec<u8>)>,
) -> Self {
Self {
id,
@@ -34,16 +34,22 @@ impl ConnectionHandle {
self.descriptor.clone()
}
#[instrument(level="trace", skip(self, message), fields(message.len = message.len()))]
pub fn send(&self, message: Vec<u8>) -> ConnectionHandleSendResult {
match self.channel.send(message) {
match self.channel.send((Span::current().id(), message)) {
Ok(()) => ConnectionHandleSendResult::Sent,
Err(e) => ConnectionHandleSendResult::NotSent(e.0),
Err(e) => ConnectionHandleSendResult::NotSent(e.0 .1),
}
}
#[instrument(level="trace", skip(self, message), fields(message.len = message.len()))]
pub async fn send_async(&self, message: Vec<u8>) -> ConnectionHandleSendResult {
match self.channel.send_async(message).await {
match self
.channel
.send_async((Span::current().id(), message))
.await
{
Ok(()) => ConnectionHandleSendResult::Sent,
Err(e) => ConnectionHandleSendResult::NotSent(e.0),
Err(e) => ConnectionHandleSendResult::NotSent(e.0 .1),
}
}
}
@@ -140,6 +140,7 @@ impl ConnectionManager {
// Internal routine to register new connection atomically.
// Registers connection in the connection table for later access
// and spawns a message processing loop for the connection
#[instrument(level = "trace", skip(self, inner), ret, err)]
fn on_new_protocol_network_connection(
&self,
inner: &mut ConnectionManagerInner,
@@ -195,6 +196,7 @@ impl ConnectionManager {
}
// Returns a network connection if one already is established
#[instrument(level = "trace", skip(self), ret)]
pub fn get_connection(&self, descriptor: ConnectionDescriptor) -> Option<ConnectionHandle> {
self.arc
.connection_table
@@ -234,40 +236,43 @@ impl ConnectionManager {
did_kill
}
// Called when we want to create a new connection or get the current one that already exists
// This will kill off any connections that are in conflict with the new connection to be made
// in order to make room for the new connection in the system's connection table
// This routine needs to be atomic, or connections may exist in the table that are not established
/// Locak remote address
// async fn lock_remote_address(&self, remote_addr: SocketAddr) -> {
// }
/// Called when we want to create a new connection or get the current one that already exists
/// This will kill off any connections that are in conflict with the new connection to be made
/// in order to make room for the new connection in the system's connection table
/// This routine needs to be atomic, or connections may exist in the table that are not established
#[instrument(level = "trace", skip(self), ret, err)]
pub async fn get_or_create_connection(
&self,
local_addr: Option<SocketAddr>,
dial_info: DialInfo,
) -> EyreResult<NetworkResult<ConnectionHandle>> {
log_net!(
warn!(
"== get_or_create_connection local_addr={:?} dial_info={:?}",
local_addr.green(),
dial_info.green()
);
// Make a connection descriptor for this dialinfo
let peer_address = dial_info.to_peer_address();
// Async lock on the remote address for atomicity
//let _lock_guard = self.lock_remote_address(peer_address.to_socket_addr());
// Kill off any possibly conflicting connections
let did_kill = self.kill_off_colliding_connections(&dial_info).await;
let mut retry_count = if did_kill { 2 } else { 0 };
// Make a connection descriptor for this dialinfo
let peer_address = dial_info.to_peer_address();
let descriptor = match local_addr {
Some(la) => {
ConnectionDescriptor::new(peer_address, SocketAddress::from_socket_addr(la))
}
None => ConnectionDescriptor::new_no_local(peer_address),
};
// If any connection to this remote exists that has the same protocol, return it
// Any connection will do, we don't have to match the local address
if let Some(conn) = self
.arc
.connection_table
.get_last_connection_by_remote(descriptor.remote())
.get_last_connection_by_remote(peer_address)
{
log_net!(
"== Returning existing connection local_addr={:?} peer_address={:?}",
@@ -378,6 +383,7 @@ impl ConnectionManager {
// Callback from network connection receive loop when it exits
// cleans up the entry in the connection table
#[instrument(level = "trace", skip(self))]
pub(super) async fn report_connection_finished(&self, connection_id: u64) {
// Get channel sender
let sender = {
@@ -72,6 +72,7 @@ impl ConnectionTable {
}
}
#[instrument(level = "trace", skip(self))]
pub async fn join(&self) {
let mut unord = {
let mut inner = self.inner.lock();
@@ -90,6 +91,7 @@ impl ConnectionTable {
while unord.next().await.is_some() {}
}
#[instrument(level = "trace", skip(self), ret, err)]
pub fn add_connection(
&self,
network_connection: NetworkConnection,
@@ -156,6 +158,7 @@ impl ConnectionTable {
Ok(out_conn)
}
#[instrument(level = "trace", skip(self), ret)]
pub fn get_connection_by_id(&self, id: NetworkConnectionId) -> Option<ConnectionHandle> {
let mut inner = self.inner.lock();
let protocol_index = *inner.protocol_index_by_id.get(&id)?;
@@ -163,6 +166,7 @@ impl ConnectionTable {
Some(out.get_handle())
}
#[instrument(level = "trace", skip(self), ret)]
pub fn get_connection_by_descriptor(
&self,
descriptor: ConnectionDescriptor,
@@ -175,6 +179,7 @@ impl ConnectionTable {
Some(out.get_handle())
}
#[instrument(level = "trace", skip(self), ret)]
pub fn get_last_connection_by_remote(&self, remote: PeerAddress) -> Option<ConnectionHandle> {
let mut inner = self.inner.lock();
@@ -184,7 +189,8 @@ impl ConnectionTable {
Some(out.get_handle())
}
pub fn _get_connection_ids_by_remote(&self, remote: PeerAddress) -> Vec<NetworkConnectionId> {
#[instrument(level = "trace", skip(self), ret)]
pub fn get_connection_ids_by_remote(&self, remote: PeerAddress) -> Vec<NetworkConnectionId> {
let inner = self.inner.lock();
inner
.ids_by_remote
@@ -219,6 +225,7 @@ impl ConnectionTable {
inner.conn_by_id.iter().fold(0, |acc, c| acc + c.len())
}
#[instrument(level = "trace", skip(inner), ret)]
fn remove_connection_records(
inner: &mut ConnectionTableInner,
id: NetworkConnectionId,
@@ -251,6 +258,7 @@ impl ConnectionTable {
conn
}
#[instrument(level = "trace", skip(self), ret)]
pub fn remove_connection_by_id(&self, id: NetworkConnectionId) -> Option<NetworkConnection> {
let mut inner = self.inner.lock();
+79 -73
View File
@@ -1321,88 +1321,93 @@ impl NetworkManager {
data: Vec<u8>,
) -> SendPinBoxFuture<EyreResult<NetworkResult<SendDataKind>>> {
let this = self.clone();
Box::pin(async move {
// info!("{}", format!("send_data to: {:?}", node_ref).red());
Box::pin(
async move {
// info!("{}", format!("send_data to: {:?}", node_ref).red());
// First try to send data to the last socket we've seen this peer on
let data = if let Some(connection_descriptor) = node_ref.last_connection() {
// info!(
// "{}",
// format!("last_connection to: {:?}", connection_descriptor).red()
// );
// First try to send data to the last socket we've seen this peer on
let data = if let Some(connection_descriptor) = node_ref.last_connection() {
// info!(
// "{}",
// format!("last_connection to: {:?}", connection_descriptor).red()
// );
match this
.net()
.send_data_to_existing_connection(connection_descriptor, data)
.await?
{
None => {
// info!(
// "{}",
// format!("sent to existing connection: {:?}", connection_descriptor)
// .red()
// );
match this
.net()
.send_data_to_existing_connection(connection_descriptor, data)
.await?
{
None => {
// info!(
// "{}",
// format!("sent to existing connection: {:?}", connection_descriptor)
// .red()
// );
// Update timestamp for this last connection since we just sent to it
// Update timestamp for this last connection since we just sent to it
node_ref
.set_last_connection(connection_descriptor, intf::get_timestamp());
return Ok(NetworkResult::value(SendDataKind::Existing(
connection_descriptor,
)));
}
Some(d) => d,
}
} else {
data
};
// info!("{}", "no existing connection".red());
// If we don't have last_connection, try to reach out to the peer via its dial info
let contact_method = this.get_contact_method(node_ref.clone());
log_net!(
"send_data via {:?} to dialinfo {:?}",
contact_method,
node_ref
);
match contact_method {
ContactMethod::OutboundRelay(relay_nr)
| ContactMethod::InboundRelay(relay_nr) => {
network_result_try!(this.send_data(relay_nr, data).await?);
Ok(NetworkResult::value(SendDataKind::Indirect))
}
ContactMethod::Direct(dial_info) => {
let connection_descriptor = network_result_try!(
this.net().send_data_to_dial_info(dial_info, data).await?
);
// If we connected to this node directly, save off the last connection so we can use it again
node_ref.set_last_connection(connection_descriptor, intf::get_timestamp());
return Ok(NetworkResult::value(SendDataKind::Existing(
Ok(NetworkResult::value(SendDataKind::Direct(
connection_descriptor,
)));
)))
}
Some(d) => d,
ContactMethod::SignalReverse(relay_nr, target_node_ref) => {
let connection_descriptor = network_result_try!(
this.do_reverse_connect(relay_nr, target_node_ref, data)
.await?
);
Ok(NetworkResult::value(SendDataKind::Direct(
connection_descriptor,
)))
}
ContactMethod::SignalHolePunch(relay_nr, target_node_ref) => {
let connection_descriptor = network_result_try!(
this.do_hole_punch(relay_nr, target_node_ref, data).await?
);
Ok(NetworkResult::value(SendDataKind::Direct(
connection_descriptor,
)))
}
ContactMethod::Unreachable => Ok(NetworkResult::no_connection_other(
"Can't send to this node",
)),
}
} else {
data
};
// info!("{}", "no existing connection".red());
// If we don't have last_connection, try to reach out to the peer via its dial info
let contact_method = this.get_contact_method(node_ref.clone());
log_net!(
"send_data via {:?} to dialinfo {:?}",
contact_method,
node_ref
);
match contact_method {
ContactMethod::OutboundRelay(relay_nr) | ContactMethod::InboundRelay(relay_nr) => {
network_result_try!(this.send_data(relay_nr, data).await?);
Ok(NetworkResult::value(SendDataKind::Indirect))
}
ContactMethod::Direct(dial_info) => {
let connection_descriptor = network_result_try!(
this.net().send_data_to_dial_info(dial_info, data).await?
);
// If we connected to this node directly, save off the last connection so we can use it again
node_ref.set_last_connection(connection_descriptor, intf::get_timestamp());
Ok(NetworkResult::value(SendDataKind::Direct(
connection_descriptor,
)))
}
ContactMethod::SignalReverse(relay_nr, target_node_ref) => {
let connection_descriptor = network_result_try!(
this.do_reverse_connect(relay_nr, target_node_ref, data)
.await?
);
Ok(NetworkResult::value(SendDataKind::Direct(
connection_descriptor,
)))
}
ContactMethod::SignalHolePunch(relay_nr, target_node_ref) => {
let connection_descriptor = network_result_try!(
this.do_hole_punch(relay_nr, target_node_ref, data).await?
);
Ok(NetworkResult::value(SendDataKind::Direct(
connection_descriptor,
)))
}
ContactMethod::Unreachable => Ok(NetworkResult::no_connection_other(
"Can't send to this node",
)),
}
})
.instrument(trace_span!("send_data")),
)
}
// Direct bootstrap request handler (separate fallback mechanism from cheaper TXT bootstrap mechanism)
@@ -1466,6 +1471,7 @@ impl NetworkManager {
// Called when a packet potentially containing an RPC envelope is received by a low-level
// network protocol handler. Processes the envelope, authenticates and decrypts the RPC message
// and passes it to the RPC handler
#[instrument(level = "trace", ret, err, skip(self, data), fields(data.len = data.len()))]
async fn on_recv_envelope(
&self,
data: &[u8],
@@ -42,7 +42,8 @@ impl Network {
&self,
tls_acceptor: &TlsAcceptor,
stream: AsyncPeekStream,
addr: SocketAddr,
peer_addr: SocketAddr,
local_addr: SocketAddr,
protocol_handlers: &[Box<dyn ProtocolAcceptHandler>],
tls_connection_initial_timeout_ms: u32,
) -> EyreResult<Option<ProtocolNetworkConnection>> {
@@ -65,18 +66,20 @@ impl Network {
.wrap_err("tls initial timeout")?
.wrap_err("failed to peek tls stream")?;
self.try_handlers(ps, addr, protocol_handlers).await
self.try_handlers(ps, peer_addr, local_addr, protocol_handlers)
.await
}
async fn try_handlers(
&self,
stream: AsyncPeekStream,
addr: SocketAddr,
peer_addr: SocketAddr,
local_addr: SocketAddr,
protocol_accept_handlers: &[Box<dyn ProtocolAcceptHandler>],
) -> EyreResult<Option<ProtocolNetworkConnection>> {
for ah in protocol_accept_handlers.iter() {
if let Some(nc) = ah
.on_accept(stream.clone(), addr)
.on_accept(stream.clone(), peer_addr, local_addr)
.await
.wrap_err("io error")?
{
@@ -105,21 +108,35 @@ impl Network {
}
};
// XXX
warn!(
"DEBUGACCEPT: local={} remote={}",
tcp_stream.local_addr().unwrap(),
tcp_stream.peer_addr().unwrap(),
);
let listener_state = listener_state.clone();
let connection_manager = connection_manager.clone();
// Limit the number of connections from the same IP address
// and the number of total connections
let addr = match tcp_stream.peer_addr() {
let peer_addr = match tcp_stream.peer_addr() {
Ok(addr) => addr,
Err(e) => {
log_net!(debug "failed to get peer address: {}", e);
return;
}
};
let local_addr = match tcp_stream.local_addr() {
Ok(addr) => addr,
Err(e) => {
log_net!(debug "failed to get local address: {}", e);
return;
}
};
// XXX limiting here instead for connection table? may be faster and avoids tls negotiation
log_net!("TCP connection from: {}", addr);
log_net!("TCP connection from: {}", peer_addr);
// Create a stream we can peek on
#[cfg(feature = "rt-tokio")]
@@ -139,7 +156,7 @@ impl Network {
{
// If we fail to get a packet within the connection initial timeout
// then we punt this connection
log_net!("connection initial timeout from: {:?}", addr);
log_net!("connection initial timeout from: {:?}", peer_addr);
return;
}
@@ -152,29 +169,30 @@ impl Network {
self.try_tls_handlers(
ls.tls_acceptor.as_ref().unwrap(),
ps,
addr,
peer_addr,
local_addr,
&ls.tls_protocol_handlers,
tls_connection_initial_timeout_ms,
)
.await
} else {
self.try_handlers(ps, addr, &ls.protocol_accept_handlers)
self.try_handlers(ps, peer_addr, local_addr, &ls.protocol_accept_handlers)
.await
};
let conn = match conn {
Ok(Some(c)) => {
log_net!("protocol handler found for {:?}: {:?}", addr, c);
log_net!("protocol handler found for {:?}: {:?}", peer_addr, c);
c
}
Ok(None) => {
// No protocol handlers matched? drop it.
log_net!(debug "no protocol handler for connection from {:?}", addr);
log_net!(debug "no protocol handler for connection from {:?}", peer_addr);
return;
}
Err(e) => {
// Failed to negotiate connection? drop it.
log_net!(debug "failed to negotiate connection from {:?}: {}", addr, e);
log_net!(debug "failed to negotiate connection from {:?}: {}", peer_addr, e);
return;
}
};
@@ -311,7 +329,6 @@ impl Network {
.push(new_protocol_accept_handler(
self.network_manager().config(),
true,
addr,
));
} else {
ls.write()
@@ -319,7 +336,6 @@ impl Network {
.push(new_protocol_accept_handler(
self.network_manager().config(),
false,
addr,
));
}
@@ -34,6 +34,7 @@ cfg_if! {
}
}
#[instrument(level = "trace", ret, err)]
pub fn new_unbound_shared_udp_socket(domain: Domain) -> io::Result<Socket> {
let socket = Socket::new(domain, Type::DGRAM, Some(Protocol::UDP))?;
if domain == Domain::IPV6 {
@@ -49,6 +50,7 @@ pub fn new_unbound_shared_udp_socket(domain: Domain) -> io::Result<Socket> {
Ok(socket)
}
#[instrument(level = "trace", ret, err)]
pub fn new_bound_shared_udp_socket(local_address: SocketAddr) -> io::Result<Socket> {
let domain = Domain::for_address(local_address);
let socket = new_unbound_shared_udp_socket(domain)?;
@@ -60,6 +62,7 @@ pub fn new_bound_shared_udp_socket(local_address: SocketAddr) -> io::Result<Sock
Ok(socket)
}
#[instrument(level = "trace", ret, err)]
pub fn new_bound_first_udp_socket(local_address: SocketAddr) -> io::Result<Socket> {
let domain = Domain::for_address(local_address);
let socket = Socket::new(domain, Type::DGRAM, Some(Protocol::UDP))?;
@@ -93,6 +96,7 @@ pub fn new_bound_first_udp_socket(local_address: SocketAddr) -> io::Result<Socke
Ok(socket)
}
#[instrument(level = "trace", ret, err)]
pub fn new_unbound_shared_tcp_socket(domain: Domain) -> io::Result<Socket> {
let socket = Socket::new(domain, Type::STREAM, Some(Protocol::TCP))?;
if let Err(e) = socket.set_linger(Some(core::time::Duration::from_secs(0))) {
@@ -114,6 +118,7 @@ pub fn new_unbound_shared_tcp_socket(domain: Domain) -> io::Result<Socket> {
Ok(socket)
}
#[instrument(level = "trace", ret, err)]
pub fn new_bound_shared_tcp_socket(local_address: SocketAddr) -> io::Result<Socket> {
let domain = Domain::for_address(local_address);
let socket = new_unbound_shared_tcp_socket(domain)?;
@@ -125,6 +130,7 @@ pub fn new_bound_shared_tcp_socket(local_address: SocketAddr) -> io::Result<Sock
Ok(socket)
}
#[instrument(level = "trace", ret, err)]
pub fn new_bound_first_tcp_socket(local_address: SocketAddr) -> io::Result<Socket> {
let domain = Domain::for_address(local_address);
@@ -166,6 +172,7 @@ pub fn new_bound_first_tcp_socket(local_address: SocketAddr) -> io::Result<Socke
}
// Non-blocking connect is tricky when you want to start with a prepared socket
#[instrument(level = "trace", ret, err)]
pub async fn nonblocking_connect(
socket: Socket,
addr: SocketAddr,
@@ -177,6 +184,9 @@ pub async fn nonblocking_connect(
// Make socket2 SockAddr
let socket2_addr = socket2::SockAddr::from(addr);
// XXX
let bind_local_addr = socket.local_addr().unwrap().as_socket().unwrap();
// Connect to the remote address
match socket.connect(&socket2_addr) {
Ok(()) => Ok(()),
@@ -184,7 +194,27 @@ pub async fn nonblocking_connect(
Err(err) if err.raw_os_error() == Some(libc::EINPROGRESS) => Ok(()),
Err(err) if err.kind() == std::io::ErrorKind::WouldBlock => Ok(()),
Err(e) => Err(e),
}?;
}
.map_err(|e| {
// XXX
warn!(
"DEBUGCONNECT XXXFAILXXX: bind={} local={} remote={}\nbacktrace={:?}",
bind_local_addr,
socket.local_addr().unwrap().as_socket().unwrap(),
addr,
backtrace::Backtrace::new(),
);
e
})?;
// XXX
warn!(
"DEBUGCONNECT: bind={} local={} remote={}\nbacktrace={:?}",
bind_local_addr,
socket.local_addr().unwrap().as_socket().unwrap(),
addr,
backtrace::Backtrace::new(),
);
let async_stream = Async::new(std::net::TcpStream::from(socket))?;
@@ -99,30 +99,20 @@ impl RawTcpNetworkConnection {
///////////////////////////////////////////////////////////
///
struct RawTcpProtocolHandlerInner {
local_address: SocketAddr,
}
#[derive(Clone)]
pub struct RawTcpProtocolHandler
where
Self: ProtocolAcceptHandler,
{
connection_initial_timeout_ms: u32,
inner: Arc<Mutex<RawTcpProtocolHandlerInner>>,
}
impl RawTcpProtocolHandler {
fn new_inner(local_address: SocketAddr) -> RawTcpProtocolHandlerInner {
RawTcpProtocolHandlerInner { local_address }
}
pub fn new(config: VeilidConfig, local_address: SocketAddr) -> Self {
pub fn new(config: VeilidConfig) -> Self {
let c = config.get();
let connection_initial_timeout_ms = c.network.connection_initial_timeout_ms;
Self {
connection_initial_timeout_ms,
inner: Arc::new(Mutex::new(Self::new_inner(local_address))),
}
}
@@ -131,6 +121,7 @@ impl RawTcpProtocolHandler {
self,
ps: AsyncPeekStream,
socket_addr: SocketAddr,
local_addr: SocketAddr,
) -> io::Result<Option<ProtocolNetworkConnection>> {
log_net!("TCP: on_accept_async: enter");
let mut peekbuf: [u8; PEEK_DETECT_LEN] = [0u8; PEEK_DETECT_LEN];
@@ -147,9 +138,8 @@ impl RawTcpProtocolHandler {
SocketAddress::from_socket_addr(socket_addr),
ProtocolType::TCP,
);
let local_address = self.inner.lock().local_address;
let conn = ProtocolNetworkConnection::RawTcp(RawTcpNetworkConnection::new(
ConnectionDescriptor::new(peer_addr, SocketAddress::from_socket_addr(local_address)),
ConnectionDescriptor::new(peer_addr, SocketAddress::from_socket_addr(local_addr)),
ps,
));
@@ -158,7 +148,7 @@ impl RawTcpProtocolHandler {
Ok(Some(conn))
}
#[instrument(level = "trace", err)]
#[instrument(level = "trace", ret, err)]
pub async fn connect(
local_address: Option<SocketAddr>,
socket_addr: SocketAddr,
@@ -202,7 +192,8 @@ impl ProtocolAcceptHandler for RawTcpProtocolHandler {
&self,
stream: AsyncPeekStream,
peer_addr: SocketAddr,
local_addr: SocketAddr,
) -> SendPinBoxFuture<io::Result<Option<ProtocolNetworkConnection>>> {
Box::pin(self.clone().on_accept_async(stream, peer_addr))
Box::pin(self.clone().on_accept_async(stream, peer_addr, local_addr))
}
}
@@ -129,7 +129,6 @@ where
///
struct WebsocketProtocolHandlerArc {
tls: bool,
local_address: SocketAddr,
request_path: Vec<u8>,
connection_initial_timeout_ms: u32,
}
@@ -142,7 +141,7 @@ where
arc: Arc<WebsocketProtocolHandlerArc>,
}
impl WebsocketProtocolHandler {
pub fn new(config: VeilidConfig, tls: bool, local_address: SocketAddr) -> Self {
pub fn new(config: VeilidConfig, tls: bool) -> Self {
let c = config.get();
let path = if tls {
format!("GET /{}", c.network.protocol.ws.path.trim_end_matches('/'))
@@ -158,7 +157,6 @@ impl WebsocketProtocolHandler {
Self {
arc: Arc::new(WebsocketProtocolHandlerArc {
tls,
local_address,
request_path: path.as_bytes().to_vec(),
connection_initial_timeout_ms,
}),
@@ -170,6 +168,7 @@ impl WebsocketProtocolHandler {
self,
ps: AsyncPeekStream,
socket_addr: SocketAddr,
local_addr: SocketAddr,
) -> io::Result<Option<ProtocolNetworkConnection>> {
log_net!("WS: on_accept_async: enter");
let request_path_len = self.arc.request_path.len() + 2;
@@ -209,10 +208,7 @@ impl WebsocketProtocolHandler {
PeerAddress::new(SocketAddress::from_socket_addr(socket_addr), protocol_type);
let conn = ProtocolNetworkConnection::WsAccepted(WebsocketNetworkConnection::new(
ConnectionDescriptor::new(
peer_addr,
SocketAddress::from_socket_addr(self.arc.local_address),
),
ConnectionDescriptor::new(peer_addr, SocketAddress::from_socket_addr(local_addr)),
ws_stream,
));
@@ -221,7 +217,7 @@ impl WebsocketProtocolHandler {
Ok(Some(conn))
}
#[instrument(level = "trace", err)]
#[instrument(level = "trace", ret, err)]
pub async fn connect(
local_address: Option<SocketAddr>,
dial_info: &DialInfo,
@@ -296,7 +292,8 @@ impl ProtocolAcceptHandler for WebsocketProtocolHandler {
&self,
stream: AsyncPeekStream,
peer_addr: SocketAddr,
local_addr: SocketAddr,
) -> SendPinBoxFuture<io::Result<Option<ProtocolNetworkConnection>>> {
Box::pin(self.clone().on_accept_async(stream, peer_addr))
Box::pin(self.clone().on_accept_async(stream, peer_addr, local_addr))
}
}
@@ -387,7 +387,7 @@ impl Network {
ip_addrs,
ws_port,
false,
Box::new(|c, t, a| Box::new(WebsocketProtocolHandler::new(c, t, a))),
Box::new(|c, t| Box::new(WebsocketProtocolHandler::new(c, t))),
)
.await?;
trace!("WS: listener started on {:#?}", socket_addresses);
@@ -496,7 +496,7 @@ impl Network {
ip_addrs,
wss_port,
true,
Box::new(|c, t, a| Box::new(WebsocketProtocolHandler::new(c, t, a))),
Box::new(|c, t| Box::new(WebsocketProtocolHandler::new(c, t))),
)
.await?;
trace!("WSS: listener started on {:#?}", socket_addresses);
@@ -590,7 +590,7 @@ impl Network {
ip_addrs,
tcp_port,
false,
Box::new(move |c, _, a| Box::new(RawTcpProtocolHandler::new(c, a))),
Box::new(move |c, _| Box::new(RawTcpProtocolHandler::new(c))),
)
.await?;
trace!("TCP: listener started on {:#?}", socket_addresses);
@@ -16,6 +16,7 @@ cfg_if::cfg_if! {
&self,
stream: AsyncPeekStream,
peer_addr: SocketAddr,
local_addr: SocketAddr,
) -> SendPinBoxFuture<io::Result<Option<ProtocolNetworkConnection>>>;
}
@@ -38,7 +39,7 @@ cfg_if::cfg_if! {
}
pub type NewProtocolAcceptHandler =
dyn Fn(VeilidConfig, bool, SocketAddr) -> Box<dyn ProtocolAcceptHandler> + Send;
dyn Fn(VeilidConfig, bool) -> Box<dyn ProtocolAcceptHandler> + Send;
}
}
///////////////////////////////////////////////////////////
@@ -91,7 +92,7 @@ pub struct NetworkConnection {
processor: Option<MustJoinHandle<()>>,
established_time: u64,
stats: Arc<Mutex<NetworkConnectionStats>>,
sender: flume::Sender<Vec<u8>>,
sender: flume::Sender<(Option<Id>, Vec<u8>)>,
stop_source: Option<StopSource>,
}
@@ -120,9 +121,6 @@ impl NetworkConnection {
protocol_connection: ProtocolNetworkConnection,
connection_id: NetworkConnectionId,
) -> Self {
// Get timeout
let network_manager = connection_manager.network_manager();
// Get descriptor
let descriptor = protocol_connection.descriptor();
@@ -181,6 +179,7 @@ impl NetworkConnection {
}
}
#[instrument(level="trace", skip(message, stats), fields(message.len = message.len()), ret, err)]
async fn send_internal(
protocol_connection: &ProtocolNetworkConnection,
stats: Arc<Mutex<NetworkConnectionStats>>,
@@ -194,6 +193,8 @@ impl NetworkConnection {
Ok(NetworkResult::Value(out))
}
#[instrument(level="trace", skip(stats), fields(ret.len), err)]
async fn recv_internal(
protocol_connection: &ProtocolNetworkConnection,
stats: Arc<Mutex<NetworkConnectionStats>>,
@@ -204,6 +205,8 @@ impl NetworkConnection {
let mut stats = stats.lock();
stats.last_message_recv_time.max_assign(Some(ts));
tracing::Span::current().record("ret.len", out.len());
Ok(NetworkResult::Value(out))
}
@@ -223,7 +226,7 @@ impl NetworkConnection {
manager_stop_token: StopToken,
connection_id: NetworkConnectionId,
descriptor: ConnectionDescriptor,
receiver: flume::Receiver<Vec<u8>>,
receiver: flume::Receiver<(Option<Id>, Vec<u8>)>,
protocol_connection: ProtocolNetworkConnection,
stats: Arc<Mutex<NetworkConnectionStats>>,
) -> SendPinBoxFuture<()> {
@@ -249,7 +252,7 @@ impl NetworkConnection {
};
let timer = MutableFuture::new(new_timer());
unord.push(system_boxed(timer.clone()));
unord.push(system_boxed(timer.clone().instrument(Span::current())));
loop {
// Add another message sender future if necessary
@@ -257,13 +260,17 @@ impl NetworkConnection {
need_sender = false;
let sender_fut = receiver.recv_async().then(|res| async {
match res {
Ok(message) => {
Ok((span_id, message)) => {
let recv_span = span!(parent: None, Level::TRACE, "process_connection recv");
recv_span.follows_from(span_id);
// send the packet
if let Err(e) = Self::send_internal(
&protocol_connection,
stats.clone(),
message,
)
).instrument(recv_span)
.await
{
// Sending the packet along can fail, if so, this connection is dead
@@ -280,7 +287,7 @@ impl NetworkConnection {
}
}
});
unord.push(system_boxed(sender_fut));
unord.push(system_boxed(sender_fut.instrument(Span::current())));
}
// Add another message receiver future if necessary
@@ -314,7 +321,7 @@ impl NetworkConnection {
}
});
unord.push(system_boxed(receiver_fut));
unord.push(system_boxed(receiver_fut.instrument(Span::current())));
}
// Process futures
@@ -358,7 +365,7 @@ impl NetworkConnection {
connection_manager
.report_connection_finished(connection_id)
.await;
})
}.instrument(trace_span!("process_connection")))
}
}
+139 -113
View File
@@ -39,119 +39,125 @@ impl NetworkManager {
// Get bootstrap nodes from hostnames concurrently
let mut unord = FuturesUnordered::new();
for bsname in bsnames {
unord.push(async move {
// look up boostrap node txt records
let bsnirecords = match intf::txt_lookup(&bsname).await {
Err(e) => {
warn!("bootstrap node txt lookup failed for {}: {}", bsname, e);
return None;
}
Ok(v) => v,
};
// for each record resolve into key/bootstraprecord pairs
let mut bootstrap_records: Vec<(DHTKey, BootstrapRecord)> = Vec::new();
for bsnirecord in bsnirecords {
// Bootstrap TXT Record Format Version 0:
// txt_version,min_version,max_version,nodeid,hostname,dialinfoshort*
//
// Split bootstrap node record by commas. Example:
// 0,0,0,7lxDEabK_qgjbe38RtBa3IZLrud84P6NhGP-pRTZzdQ,bootstrap-1.dev.veilid.net,T5150,U5150,W5150/ws
let records: Vec<String> = bsnirecord
.trim()
.split(',')
.map(|x| x.trim().to_owned())
.collect();
if records.len() < 6 {
warn!("invalid number of fields in bootstrap txt record");
continue;
}
// Bootstrap TXT record version
let txt_version: u8 = match records[0].parse::<u8>() {
Ok(v) => v,
unord.push(
async move {
// look up boostrap node txt records
let bsnirecords = match intf::txt_lookup(&bsname).await {
Err(e) => {
warn!(
warn!("bootstrap node txt lookup failed for {}: {}", bsname, e);
return None;
}
Ok(v) => v,
};
// for each record resolve into key/bootstraprecord pairs
let mut bootstrap_records: Vec<(DHTKey, BootstrapRecord)> = Vec::new();
for bsnirecord in bsnirecords {
// Bootstrap TXT Record Format Version 0:
// txt_version,min_version,max_version,nodeid,hostname,dialinfoshort*
//
// Split bootstrap node record by commas. Example:
// 0,0,0,7lxDEabK_qgjbe38RtBa3IZLrud84P6NhGP-pRTZzdQ,bootstrap-1.dev.veilid.net,T5150,U5150,W5150/ws
let records: Vec<String> = bsnirecord
.trim()
.split(',')
.map(|x| x.trim().to_owned())
.collect();
if records.len() < 6 {
warn!("invalid number of fields in bootstrap txt record");
continue;
}
// Bootstrap TXT record version
let txt_version: u8 = match records[0].parse::<u8>() {
Ok(v) => v,
Err(e) => {
warn!(
"invalid txt_version specified in bootstrap node txt record: {}",
e
);
continue;
}
};
if txt_version != BOOTSTRAP_TXT_VERSION {
warn!("unsupported bootstrap txt record version");
continue;
}
};
if txt_version != BOOTSTRAP_TXT_VERSION {
warn!("unsupported bootstrap txt record version");
continue;
}
// Min/Max wire protocol version
let min_version: u8 = match records[1].parse::<u8>() {
Ok(v) => v,
Err(e) => {
warn!(
// Min/Max wire protocol version
let min_version: u8 = match records[1].parse::<u8>() {
Ok(v) => v,
Err(e) => {
warn!(
"invalid min_version specified in bootstrap node txt record: {}",
e
);
continue;
}
};
let max_version: u8 = match records[2].parse::<u8>() {
Ok(v) => v,
Err(e) => {
warn!(
continue;
}
};
let max_version: u8 = match records[2].parse::<u8>() {
Ok(v) => v,
Err(e) => {
warn!(
"invalid max_version specified in bootstrap node txt record: {}",
e
);
continue;
}
};
// Node Id
let node_id_str = &records[3];
let node_id_key = match DHTKey::try_decode(node_id_str) {
Ok(v) => v,
Err(e) => {
warn!(
"Invalid node id in bootstrap node record {}: {}",
node_id_str, e
);
continue;
}
};
// Hostname
let hostname_str = &records[4];
// If this is our own node id, then we skip it for bootstrap, in case we are a bootstrap node
if self.routing_table().node_id() == node_id_key {
continue;
}
// Resolve each record and store in node dial infos list
let mut bootstrap_record = BootstrapRecord {
min_version,
max_version,
dial_info_details: Vec::new(),
};
for rec in &records[5..] {
let rec = rec.trim();
let dial_infos = match DialInfo::try_vec_from_short(rec, hostname_str) {
Ok(dis) => dis,
Err(e) => {
warn!("Couldn't resolve bootstrap node dial info {}: {}", rec, e);
continue;
}
};
for di in dial_infos {
bootstrap_record.dial_info_details.push(DialInfoDetail {
dial_info: di,
class: DialInfoClass::Direct,
});
// Node Id
let node_id_str = &records[3];
let node_id_key = match DHTKey::try_decode(node_id_str) {
Ok(v) => v,
Err(e) => {
warn!(
"Invalid node id in bootstrap node record {}: {}",
node_id_str, e
);
continue;
}
};
// Hostname
let hostname_str = &records[4];
// If this is our own node id, then we skip it for bootstrap, in case we are a bootstrap node
if self.routing_table().node_id() == node_id_key {
continue;
}
// Resolve each record and store in node dial infos list
let mut bootstrap_record = BootstrapRecord {
min_version,
max_version,
dial_info_details: Vec::new(),
};
for rec in &records[5..] {
let rec = rec.trim();
let dial_infos = match DialInfo::try_vec_from_short(rec, hostname_str) {
Ok(dis) => dis,
Err(e) => {
warn!(
"Couldn't resolve bootstrap node dial info {}: {}",
rec, e
);
continue;
}
};
for di in dial_infos {
bootstrap_record.dial_info_details.push(DialInfoDetail {
dial_info: di,
class: DialInfoClass::Direct,
});
}
}
bootstrap_records.push((node_id_key, bootstrap_record));
}
bootstrap_records.push((node_id_key, bootstrap_record));
Some(bootstrap_records)
}
Some(bootstrap_records)
});
.instrument(Span::current()),
);
}
let mut bsmap = BootstrapRecordMap::new();
@@ -172,6 +178,7 @@ impl NetworkManager {
}
// 'direct' bootstrap task routine for systems incapable of resolving TXT records, such as browser WASM
#[instrument(level = "trace", skip(self), err)]
pub(super) async fn direct_bootstrap_task_routine(
self,
stop_token: StopToken,
@@ -201,7 +208,8 @@ impl NetworkManager {
let routing_table = routing_table.clone();
unord.push(
// lets ask bootstrap to find ourselves now
async move { routing_table.reverse_find_node(nr, true).await },
async move { routing_table.reverse_find_node(nr, true).await }
.instrument(Span::current()),
);
}
}
@@ -300,23 +308,26 @@ impl NetworkManager {
) {
// Add this our futures to process in parallel
let routing_table = routing_table.clone();
unord.push(async move {
// Need VALID signed peer info, so ask bootstrap to find_node of itself
// which will ensure it has the bootstrap's signed peer info as part of the response
let _ = routing_table.find_target(nr.clone()).await;
unord.push(
async move {
// Need VALID signed peer info, so ask bootstrap to find_node of itself
// which will ensure it has the bootstrap's signed peer info as part of the response
let _ = routing_table.find_target(nr.clone()).await;
// Ensure we got the signed peer info
if !nr.signed_node_info_has_valid_signature(RoutingDomain::PublicInternet) {
log_net!(warn
"bootstrap at {:?} did not return valid signed node info",
nr
);
// If this node info is invalid, it will time out after being unpingable
} else {
// otherwise this bootstrap is valid, lets ask it to find ourselves now
routing_table.reverse_find_node(nr, true).await
// Ensure we got the signed peer info
if !nr.signed_node_info_has_valid_signature(RoutingDomain::PublicInternet) {
log_net!(warn
"bootstrap at {:?} did not return valid signed node info",
nr
);
// If this node info is invalid, it will time out after being unpingable
} else {
// otherwise this bootstrap is valid, lets ask it to find ourselves now
routing_table.reverse_find_node(nr, true).await
}
}
});
.instrument(Span::current()),
);
}
}
@@ -382,7 +393,11 @@ impl NetworkManager {
let nr_filtered =
nr.filtered_clone(NodeRefFilter::new().with_dial_info_filter(dif));
log_net!("--> Keepalive ping to {:?}", nr_filtered);
unord.push(async move { rpc.rpc_call_status(nr_filtered).await }.boxed());
unord.push(
async move { rpc.rpc_call_status(nr_filtered).await }
.instrument(Span::current())
.boxed(),
);
did_pings = true;
}
}
@@ -392,7 +407,11 @@ impl NetworkManager {
// any mapped ports to preserve
if !did_pings {
let rpc = rpc.clone();
unord.push(async move { rpc.rpc_call_status(nr).await }.boxed());
unord.push(
async move { rpc.rpc_call_status(nr).await }
.instrument(Span::current())
.boxed(),
);
}
}
@@ -420,7 +439,11 @@ impl NetworkManager {
let rpc = rpc.clone();
// Just do a single ping with the best protocol for all the nodes
unord.push(async move { rpc.rpc_call_status(nr).await }.boxed());
unord.push(
async move { rpc.rpc_call_status(nr).await }
.instrument(Span::current())
.boxed(),
);
}
Ok(())
@@ -479,7 +502,10 @@ impl NetworkManager {
);
for nr in noderefs {
let routing_table = routing_table.clone();
ord.push_back(async move { routing_table.reverse_find_node(nr, false).await });
ord.push_back(
async move { routing_table.reverse_find_node(nr, false).await }
.instrument(Span::current()),
);
}
// do peer minimum search in order from fastest to slowest
@@ -106,7 +106,7 @@ impl WebsocketNetworkConnection {
pub struct WebsocketProtocolHandler {}
impl WebsocketProtocolHandler {
#[instrument(level = "trace", err)]
#[instrument(level = "trace", ret, err)]
pub async fn connect(
dial_info: &DialInfo,
timeout_ms: u32,