Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: increment and refactor number of hops for routed messages #12188

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion chain/network/src/network_protocol/borsh_conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ impl TryFrom<&net::PeerMessage> for mem::PeerMessage {
net::PeerMessage::Routed(r) => mem::PeerMessage::Routed(Box::new(RoutedMessageV2 {
msg: *r,
created_at: None,
num_hops: Some(0),
num_hops: 0,
})),
net::PeerMessage::Disconnect => mem::PeerMessage::Disconnect(mem::Disconnect {
// This flag is used by the disconnecting peer to advise the other peer that there
Expand Down
4 changes: 2 additions & 2 deletions chain/network/src/network_protocol/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ pub struct RoutedMessageV2 {
pub created_at: Option<time::Utc>,
/// Number of peers this routed message travelled through.
/// Doesn't include the peers that are the source and the destination of the message.
pub num_hops: Option<i32>,
pub num_hops: u32,
}

impl std::ops::Deref for RoutedMessageV2 {
Expand Down Expand Up @@ -928,7 +928,7 @@ impl RawRoutedMessage {
body: self.body,
},
created_at: now,
num_hops: Some(0),
num_hops: 0,
}
}
}
25 changes: 14 additions & 11 deletions chain/network/src/network_protocol/network.proto
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import "google/protobuf/timestamp.proto";
message OwnedAccount {
PublicKey account_key = 1; // required
// PeerId of the node owning the account_key.
PublicKey peer_id = 2; // required
PublicKey peer_id = 2; // required
// Timestamp indicates the date of signing - we do not assume the
// nodes' clocks to be synchronized, but for security if the timestamp
// deviation is too large, the handshake will be rejected.
Expand Down Expand Up @@ -158,7 +158,7 @@ message Handshake {
uint32 sender_listen_port = 5;
// Basic info about the NEAR chain that the sender belongs to.
// Sender expects receiver to belong to the same chain.
// In case of mismatch, receiver sends back HandshakeFailure with
// In case of mismatch, receiver sends back HandshakeFailure with
// reason GenesisMismatch.
PeerChainInfo sender_chain_info = 6;
// Edge (sender,receiver) signed by sender, which once signed by
Expand Down Expand Up @@ -227,7 +227,7 @@ message AccountData {
// to a specific peer_id. Then this field won't be necessary.
// Unless we use it instead of AnnounceAccount.
PublicKey peer_id = 5; // required.

PublicKey account_key = 6; // required.

// List of nodes which
Expand All @@ -244,7 +244,7 @@ message AccountData {
uint64 version = 7;
// Time of creation of this AccountData.
// TODO(gprusak): consider expiring the AccountData based on this field.
google.protobuf.Timestamp timestamp = 4;
google.protobuf.Timestamp timestamp = 4;
}

// Message sent whenever the sender learns about new connections
Expand All @@ -261,7 +261,7 @@ message AccountData {
message RoutingTableUpdate {
reserved 3,4;
repeated Edge edges = 1;
// list of known NEAR validator accounts
// list of known NEAR validator accounts
repeated AnnounceAccount accounts = 2;
}

Expand Down Expand Up @@ -373,11 +373,14 @@ message SignedTransaction {
// Wrapper of borsh-encoded RoutedMessage
// https://github.com/near/nearcore/blob/1a4edefd0116f7d1e222bc96569367a02fe64199/chain/network-primitives/src/network_protocol/mod.rs#L295
message RoutedMessage {
// Deprecated
reserved 3;

bytes borsh = 1;
// Timestamp of creating the Routed message by its original author.
google.protobuf.Timestamp created_at = 2;
// Number of peers this routed message travelled through. Doesn't include the peer that created the message.
optional int32 num_hops = 3;
uint32 num_hops = 4;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just want to confirm, are you completely sure that deprecating this field and reusing the field name is OK?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be ok:

  • reusing the field name is fine as the protobuf encoding is based on the tag numbers
  • there could be some data loss with deprecating the old field (for example if a node that uses the old version sends it to one that uses the new one) but the field was not used before anyway

There also is a protobuf backwards compatibility test which passed, but I do not know how it works

}

// Disconnect is send by a node before closing a TCP connection.
Expand Down Expand Up @@ -430,7 +433,7 @@ message StateResponse {
}

message SnapshotHostInfo {
PublicKey peer_id = 1;
PublicKey peer_id = 1;
CryptoHash sync_hash = 2;
uint64 epoch_height = 3;
repeated uint64 shards = 4;
Expand Down Expand Up @@ -472,21 +475,21 @@ message PeerMessage {
LastEdge last_edge = 6;
RoutingTableUpdate sync_routing_table = 7;
DistanceVector distance_vector = 28;

UpdateNonceRequest update_nonce_request = 8;
UpdateNonceResponse update_nonce_response = 9;

SyncAccountsData sync_accounts_data = 25;

PeersRequest peers_request = 10;
PeersResponse peers_response = 11;

BlockHeadersRequest block_headers_request = 12;
BlockHeadersResponse block_headers_response = 13;

BlockRequest block_request = 14;
BlockResponse block_response = 15;

SignedTransaction transaction = 16;
RoutedMessage routed = 17;
Disconnect disconnect = 18;
Expand Down
1 change: 1 addition & 0 deletions chain/network/src/peer/peer_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1431,6 +1431,7 @@ impl PeerActor {
}
} else {
if msg.decrease_ttl() {
msg.num_hops += 1;
self.network_state.send_message_to_peer(&self.clock, conn.tier, msg);
} else {
#[cfg(test)]
Expand Down
5 changes: 3 additions & 2 deletions chain/network/src/peer_manager/tests/routing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -879,9 +879,9 @@ async fn max_num_peers_limit() {
drop(pm3);
}

/// Test that TTL is handled properly.
/// Test that TTL and number of hops are handled properly.
#[tokio::test]
async fn ttl() {
async fn ttl_and_num_hops() {
abort_on_panic();
let mut rng = make_rng(921853233);
let rng = &mut rng;
Expand Down Expand Up @@ -931,6 +931,7 @@ async fn ttl() {
.await;
assert_eq!(msg.body, got.body);
assert_eq!(msg.ttl - 1, got.ttl);
assert_eq!(msg.num_hops + 1, got.num_hops);
}
}
}
Expand Down
14 changes: 5 additions & 9 deletions chain/network/src/stats/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -434,17 +434,13 @@ fn record_routed_msg_latency(
// The routed message reached its destination. If the number of hops is known, then update the
// corresponding metric.
fn record_routed_msg_hops(msg: &RoutedMessageV2) {
const MAX_NUM_HOPS: i32 = 20;
const MAX_NUM_HOPS: u32 = 20;
// We assume that the number of hops is small.
// As long as the number of hops is below 10, this metric will not consume too much memory.
if let Some(num_hops) = msg.num_hops {
if num_hops >= 0 {
let num_hops = if num_hops > MAX_NUM_HOPS { MAX_NUM_HOPS } else { num_hops };
NETWORK_ROUTED_MSG_NUM_HOPS
.with_label_values(&[msg.body_variant(), &num_hops.to_string()])
.inc();
}
}
let num_hops = std::cmp::min(MAX_NUM_HOPS, msg.num_hops);
NETWORK_ROUTED_MSG_NUM_HOPS
.with_label_values(&[msg.body_variant(), &num_hops.to_string()])
.inc();
}

#[derive(Clone, Copy, strum::AsRefStr)]
Expand Down
Loading