ethereum, P2P
P2P Server
Node will start make the P2P Server and start it:
github.com/ethereum/go-ethereum/p2p.(*Server).Start at server.go:438
github.com/ethereum/go-ethereum/node.(*Node).Start at node.go:220
github.com/ethereum/go-ethereum/cmd/utils.StartNode at cmd.go:67
main.startNode at main.go:331
main.geth at main.go:308
gopkg.in/urfave/cli%2ev1.HandleAction at app.go:490
gopkg.in/urfave/cli%2ev1.(*App).Run at app.go:264
main.main at main.go:248
runtime.main at proc.go:203
runtime.goexit at asm_amd64.s:1357
- Async stack trace
runtime.rt0_go at asm_amd64.s:220
Start
// Start starts running the server.
// Servers can not be re-used after stopping.
func (srv *Server) Start() (err error) {
srv.lock.Lock()
defer srv.lock.Unlock()
if srv.running {
return errors.New("server already running")
}
srv.running = true
srv.log = srv.Config.Logger
if srv.log == nil {
srv.log = log.Root()
}
if srv.clock == nil {
srv.clock = mclock.System{}
}
if srv.NoDial && srv.ListenAddr == "" {
srv.log.Warn("P2P server will be useless, neither dialing nor listening")
}
// static fields
if srv.PrivateKey == nil {
return errors.New("Server.PrivateKey must be set to a non-nil key")
}
if srv.newTransport == nil {
srv.newTransport = newRLPX
}
if srv.listenFunc == nil {
srv.listenFunc = net.Listen
}
srv.quit = make(chan struct{})
srv.delpeer = make(chan peerDrop)
srv.checkpointPostHandshake = make(chan *conn)
srv.checkpointAddPeer = make(chan *conn)
srv.addtrusted = make(chan *enode.Node)
srv.removetrusted = make(chan *enode.Node)
srv.peerOp = make(chan peerOpFunc)
srv.peerOpDone = make(chan struct{})
if err := srv.setupLocalNode(); err != nil {
return err
}
if srv.ListenAddr != "" {
if err := srv.setupListening(); err != nil {
return err
}
}
if err := srv.setupDiscovery(); err != nil {
return err
}
srv.setupDialScheduler()
srv.loopWG.Add(1)
go srv.run()
return nil
}
Some go chan will be created:
- quit, for stoppting
- delpeer, for peer disconnected
- checkpointPostHandshake, connection has passed the encryption handshake but not verified yet
- checkpointAddPeer, peer verified, then launch
- addtrusted: AddTrustedPeer to add a node to the trusted node set
- removetrusted: RemoveTrustedPeer from node set
- peerOp, used by Peers and PeerCount
- peerOpDone, used by Peers and PeerCount
Components
- Setup LocalNode
- Setup Listening TCP
- Setup Discovery
- KAD
- V5
- Setup DialScheduler
- Main Loop
Main Loop
Main loop handles messages received from go chan, to manage running peers.
--> Add/Remove TrustedPeer
--> A
// run is the main loop of the server.
func (srv *Server) run() {
...
for {
select {
case <-srv.quit:
// The server was stopped. Run the cleanup logic.
break running
case n := <-srv.addtrusted:
// This channel is used by AddTrustedPeer to add a node
// to the trusted node set.
srv.log.Trace("Adding trusted node", "node", n)
trusted[n.ID()] = true
if p, ok := peers[n.ID()]; ok {
p.rw.set(trustedConn, true)
}
case n := <-srv.removetrusted:
// This channel is used by RemoveTrustedPeer to remove a node
// from the trusted node set.
srv.log.Trace("Removing trusted node", "node", n)
delete(trusted, n.ID())
if p, ok := peers[n.ID()]; ok {
p.rw.set(trustedConn, false)
}
case op := <-srv.peerOp:
// This channel is used by Peers and PeerCount.
op(peers)
srv.peerOpDone <- struct{}{}
case c := <-srv.checkpointPostHandshake:
// A connection has passed the encryption handshake so
// the remote identity is known (but hasn't been verified yet).
if trusted[c.node.ID()] {
// Ensure that the trusted flag is set before checking against MaxPeers.
c.flags |= trustedConn
}
// TODO: track in-progress inbound node IDs (pre-Peer) to avoid dialing them.
c.cont <- srv.postHandshakeChecks(peers, inboundCount, c)
case c := <-srv.checkpointAddPeer:
// At this point the connection is past the protocol handshake.
// Its capabilities are known and the remote identity is verified.
err := srv.addPeerChecks(peers, inboundCount, c)
if err == nil {
// The handshakes are done and it passed all checks.
p := srv.launchPeer(c)
peers[c.node.ID()] = p
srv.log.Debug("Adding p2p peer", "peercount", len(peers), "id", p.ID(), "conn", c.flags, "addr", p.RemoteAddr(), "name", truncateName(c.name))
srv.dialsched.peerAdded(c)
if p.Inbound() {
inboundCount++
}
}
c.cont <- err
case pd := <-srv.delpeer:
// A peer disconnected.
d := common.PrettyDuration(mclock.Now() - pd.created)
delete(peers, pd.ID())
srv.log.Debug("Removing p2p peer", "peercount", len(peers), "id", pd.ID(), "duration", d, "req", pd.requested, "err", pd.err)
srv.dialsched.peerRemoved(pd.rw)
if pd.Inbound() {
inboundCount--
}
}
}
...
}
Launch Peer
In Main loop, launchPeer() will be called when a new connection arrived and all checks passed.
The peer will be created from the connection and added to peer set as well as dial scheduler.
case c := <-srv.checkpointAddPeer:
// At this point the connection is past the protocol handshake.
// Its capabilities are known and the remote identity is verified.
err := srv.addPeerChecks(peers, inboundCount, c)
if err == nil {
// The handshakes are done and it passed all checks.
p := srv.launchPeer(c)
peers[c.node.ID()] = p
srv.log.Debug("Adding p2p peer", "peercount", len(peers), "id", p.ID(), "conn", c.flags, "addr", p.RemoteAddr(), "name", truncateName(c.name))
srv.dialsched.peerAdded(c)
if p.Inbound() {
inboundCount++
}
}
c.cont <- err
////////////////////////////////////////////////////////////////////////
func (srv *Server) launchPeer(c *conn) *Peer {
p := newPeer(srv.log, c, srv.Protocols)
if srv.EnableMsgEvents {
// If message events are enabled, pass the peerFeed
// to the peer.
p.events = &srv.peerFeed
}
go srv.runPeer(p)
return p
}
// runPeer runs in its own goroutine for each peer.
func (srv *Server) runPeer(p *Peer) {
if srv.newPeerHook != nil {
srv.newPeerHook(p)
}
srv.peerFeed.Send(&PeerEvent{
Type: PeerEventTypeAdd,
Peer: p.ID(),
RemoteAddress: p.RemoteAddr().String(),
LocalAddress: p.LocalAddr().String(),
})
// Run the per-peer main loop.
remoteRequested, err := p.run()
// Announce disconnect on the main loop to update the peer set.
// The main loop waits for existing peers to be sent on srv.delpeer
// before returning, so this send should not select on srv.quit.
srv.delpeer <- peerDrop{p, err, remoteRequested}
// Broadcast peer drop to external subscribers. This needs to be
// after the send to delpeer so subscribers have a consistent view of
// the peer set (i.e. Server.Peers() doesn't include the peer when the
// event is received.
srv.peerFeed.Send(&PeerEvent{
Type: PeerEventTypeDrop,
Peer: p.ID(),
Error: err.Error(),
RemoteAddress: p.RemoteAddr().String(),
LocalAddress: p.LocalAddr().String(),
})
}
func (p *Peer) run() (remoteRequested bool, err error) {
var (
writeStart = make(chan struct{}, 1)
writeErr = make(chan error, 1)
readErr = make(chan error, 1)
reason DiscReason // sent to the peer
)
p.wg.Add(2)
go p.readLoop(readErr)
go p.pingLoop()
// Start all protocol handlers.
writeStart <- struct{}{}
p.startProtocols(writeStart, writeErr)
// Wait for an error or disconnect.
loop:
for {
select {
case err = <-writeErr:
// A write finished. Allow the next write to start if
// there was no error.
if err != nil {
reason = DiscNetworkError
break loop
}
writeStart <- struct{}{}
case err = <-readErr:
if r, ok := err.(DiscReason); ok {
remoteRequested = true
reason = r
} else {
reason = DiscNetworkError
}
break loop
case err = <-p.protoErr:
reason = discReasonForError(err)
break loop
case err = <-p.disc:
reason = discReasonForError(err)
break loop
}
}
close(p.closed)
p.rw.close(reason)
p.wg.Wait()
return remoteRequested, err
}
A few go routines will started to run the peer:
- readLoop, read messages from peer, then handle them
- pingLoop, send pingMsg periodically
- ProtocolManager for upper layer protocol -> handleMessage
- a loop for error or disconection
Below is the main prodecure of protocol manager:
Run: func(p *p2p.Peer, rw p2p.MsgReadWriter) error {
peer := pm.newPeer(int(version), p, rw, pm.txpool.Get)
select {
case pm.newPeerCh <- peer:
pm.wg.Add(1)
defer pm.wg.Done()
return pm.handle(peer)
case <-pm.quitSync:
return p2p.DiscQuitting
}
},
An eth peer will be created from p2p.peer, and passed to newPeerCh of ProtocolManager.
pm.handle(peer) will take over the peer and process eth protocol.
Each p2p.peer will set up a ProtoRW which acts a bridge between P2P readLoop and ProtocolManager for sending/receiving eth messages out/from.
TCP listening
It accepts inbound connection, which is handled by a goroutine ‘go SetupConn’. The first thing is to run handshake with the peer endpoint. Then a message will be sent to mainloop checkpointPostHandshake for validity check. After successfuly check, main loop can send back a message to allow further connection establishment. In the end, a message will be passed to checkpointAddPeer in main loop to indicate a new peer.
// SetupConn runs the handshakes and attempts to add the connection
// as a peer. It returns when the connection has been added as a peer
// or the handshakes have failed.
func (srv *Server) setupConn(c *conn, flags connFlag, dialDest *enode.Node) error {
// If dialing, figure out the remote public key.
var dialPubkey *ecdsa.PublicKey
if dialDest != nil {
dialPubkey = new(ecdsa.PublicKey)
if err := dialDest.Load((*enode.Secp256k1)(dialPubkey)); err != nil {
err = errors.New("dial destination doesn't have a secp256k1 public key")
srv.log.Trace("Setting up connection failed", "addr", c.fd.RemoteAddr(), "conn", c.flags, "err", err)
return err
}
}
// Run the RLPx handshake.
remotePubkey, err := c.doEncHandshake(srv.PrivateKey, dialPubkey)
if err != nil {
srv.log.Trace("Failed RLPx handshake", "addr", c.fd.RemoteAddr(), "conn", c.flags, "err", err)
return err
}
if dialDest != nil {
// For dialed connections, check that the remote public key matches.
if dialPubkey.X.Cmp(remotePubkey.X) != 0 || dialPubkey.Y.Cmp(remotePubkey.Y) != 0 {
return DiscUnexpectedIdentity
}
c.node = dialDest
} else {
c.node = nodeFromConn(remotePubkey, c.fd)
}
clog := srv.log.New("id", c.node.ID(), "addr", c.fd.RemoteAddr(), "conn", c.flags)
err = srv.checkpoint(c, srv.checkpointPostHandshake)
if err != nil {
clog.Trace("Rejected peer", "err", err)
return err
}
// Run the capability negotiation handshake.
phs, err := c.doProtoHandshake(srv.ourHandshake)
if err != nil {
clog.Trace("Failed p2p handshake", "err", err)
return err
}
if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) {
clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID))
return DiscUnexpectedIdentity
}
c.caps, c.name = phs.Caps, phs.Name
err = srv.checkpoint(c, srv.checkpointAddPeer)
if err != nil {
clog.Trace("Rejected peer", "err", err)
return err
}
return nil
}
Note: when setting up connection, RPLX will be created for the tranport of conn.
RPLX implements MsgReader/Writer interfaces for sending/receiving RLP encoded messages.
func newRLPX(fd net.Conn) transport {
fd.SetDeadline(time.Now().Add(handshakeTimeout))
return &rlpx{fd: fd}
}
Discovery
Ii will setup the FairMix iterator for Dial Scheudler, then start KAD or v5 discovery, or both.
KAD
- KAD table routine for KAD refreshing
- loop: keeps track of the refresh timer and the pending reply queue
- readLoop: handle incoming UDP packets
KAD is used to find more nodes by running KAD lookup…
func ListenV4(c UDPConn, ln *enode.LocalNode, cfg Config) (*UDPv4, error) {
closeCtx, cancel := context.WithCancel(context.Background())
t := &UDPv4{
conn: c,
priv: cfg.PrivateKey,
netrestrict: cfg.NetRestrict,
localNode: ln,
db: ln.Database(),
gotreply: make(chan reply),
addReplyMatcher: make(chan *replyMatcher),
closeCtx: closeCtx,
cancelCloseCtx: cancel,
log: cfg.Log,
}
if t.log == nil {
t.log = log.Root()
}
tab, err := newTable(t, ln.Database(), cfg.Bootnodes, t.log)
if err != nil {
return nil, err
}
t.tab = tab
go tab.loop()
t.wg.Add(2)
go t.loop()
go t.readLoop(cfg.Unhandled)
return t, nil
}
Dial Scheduler
Two goroutines will be started:
- readNodes: fetch node info from an iterator, driven by Discovery
- loop: handle messsage
Loop:
- nodesCh: new node discovered by Discovery
- doneCh: dialingTask done, remove from dialing task set
- addPeerCh: connection established, remove from static task set
- remPeerCh: disconnected, removed
- addStaticCh: add static node, new dialing task
- remStaticCh: remove static node
- historyExp: timer to remove item in d.history
Besides static node, dial scheduler only takes node input from Discovery, FairMix iterator.
case node := <-nodesCh:
if err := d.checkDial(node); err != nil {
d.log.Trace("Discarding dial candidate", "id", node.ID(), "ip", node.IP(), "reason", err)
} else {
d.startDial(newDialTask(node, dynDialedConn))
}
case task := <-d.doneCh:
id := task.dest.ID()
delete(d.dialing, id)
d.updateStaticPool(id)
d.doneSinceLastLog++
case c := <-d.addPeerCh:
if c.is(dynDialedConn) || c.is(staticDialedConn) {
d.dialPeers++
}
id := c.node.ID()
d.peers[id] = c.flags
// Remove from static pool because the node is now connected.
task := d.static[id]
if task != nil && task.staticPoolIndex >= 0 {
d.removeFromStaticPool(task.staticPoolIndex)
}
// TODO: cancel dials to connected peers
case c := <-d.remPeerCh:
if c.is(dynDialedConn) || c.is(staticDialedConn) {
d.dialPeers--
}
delete(d.peers, c.node.ID())
d.updateStaticPool(c.node.ID())
case node := <-d.addStaticCh:
id := node.ID()
_, exists := d.static[id]
d.log.Trace("Adding static node", "id", id, "ip", node.IP(), "added", !exists)
if exists {
continue loop
}
task := newDialTask(node, staticDialedConn)
d.static[id] = task
if d.checkDial(node) == nil {
d.addToStaticPool(task)
}
case node := <-d.remStaticCh:
id := node.ID()
task := d.static[id]
d.log.Trace("Removing static node", "id", id, "ok", task != nil)
if task != nil {
delete(d.static, id)
if task.staticPoolIndex >= 0 {
d.removeFromStaticPool(task.staticPoolIndex)
}
}
case <-historyExp:
d.expireHistory()
case <-d.ctx.Done():
it.Close()
break loop
RLPX
rlpx is the transport protocol used by actual (non-test) connections. It wraps the frame encoder with locks and read/write deadlines.
Interfaces implemented by rlpx:
- MsgReader
- MsgWriter
- MsgReadWriter
- transport
type rlpx struct {
fd net.Conn // underlying transport
rmu, wmu sync.Mutex
rw *rlpxFrameRW
}
To set up RLPX:
- doEncHandshake, exchange pubkey, and enc params
- newRLPXFrameRW, snappy or not
- doProtoHandshake, exchange handshakeMsg
RLPXFrameRW
RLPXFrameRW is actually handling the message framing and ciphering/hashing.
It uses AES-CTR & SHA3 for chiphering & hashing. Note that AES-CBC is used to encypt HMAC after hashing.
func newRLPXFrameRW(conn io.ReadWriter, s secrets) *rlpxFrameRW {
macc, err := aes.NewCipher(s.MAC)
if err != nil {
panic("invalid MAC secret: " + err.Error())
}
encc, err := aes.NewCipher(s.AES)
if err != nil {
panic("invalid AES secret: " + err.Error())
}
// we use an all-zeroes IV for AES because the key used
// for encryption is ephemeral.
iv := make([]byte, encc.BlockSize())
return &rlpxFrameRW{
conn: conn,
enc: cipher.NewCTR(encc, iv),
dec: cipher.NewCTR(encc, iv),
macCipher: macc,
egressMAC: s.EgressMAC,
ingressMAC: s.IngressMAC,
}
}
Snappy
Snappy compression. It is said that it has good balance between comppresion ratio and speed.
readMsg & writeMsg
Msg Format is like:
16B(Len+ZeroMsg) + 16B(headerMAC) + Variant(Msg) + 16B(MsgMAC)
Variant(Msg) = Encrypted(rlp.Encode(Msg.Code) + Msg.Payload)
Msg.Payload = SnappyEncoded(Msg.Payload)
func (rw *rlpxFrameRW) WriteMsg(msg Msg) error {
ptype, _ := rlp.EncodeToBytes(msg.Code)
// if snappy is enabled, compress message now
if rw.snappy {
if msg.Size > maxUint24 {
return errPlainMessageTooLarge
}
payload, _ := ioutil.ReadAll(msg.Payload)
payload = snappy.Encode(nil, payload)
msg.Payload = bytes.NewReader(payload)
msg.Size = uint32(len(payload))
}
msg.meterSize = msg.Size
if metrics.Enabled && msg.meterCap.Name != "" { // don't meter non-subprotocol messages
m := fmt.Sprintf("%s/%s/%d/%#02x", egressMeterName, msg.meterCap.Name, msg.meterCap.Version, msg.meterCode)
metrics.GetOrRegisterMeter(m, nil).Mark(int64(msg.meterSize))
}
// write header
headbuf := make([]byte, 32)
fsize := uint32(len(ptype)) + msg.Size
if fsize > maxUint24 {
return errors.New("message size overflows uint24")
}
putInt24(fsize, headbuf) // TODO: check overflow
copy(headbuf[3:], zeroHeader)
rw.enc.XORKeyStream(headbuf[:16], headbuf[:16]) // first half is now encrypted
// write header MAC
copy(headbuf[16:], updateMAC(rw.egressMAC, rw.macCipher, headbuf[:16]))
if _, err := rw.conn.Write(headbuf); err != nil {
return err
}
// write encrypted frame, updating the egress MAC hash with
// the data written to conn.
tee := cipher.StreamWriter{S: rw.enc, W: io.MultiWriter(rw.conn, rw.egressMAC)}
if _, err := tee.Write(ptype); err != nil {
return err
}
if _, err := io.Copy(tee, msg.Payload); err != nil {
return err
}
if padding := fsize % 16; padding > 0 {
if _, err := tee.Write(zero16[:16-padding]); err != nil {
return err
}
}
// write frame MAC. egress MAC hash is up to date because
// frame content was written to it as well.
fmacseed := rw.egressMAC.Sum(nil)
mac := updateMAC(rw.egressMAC, rw.macCipher, fmacseed)
_, err := rw.conn.Write(mac)
return err
}
func (rw *rlpxFrameRW) ReadMsg() (msg Msg, err error) {
// read the header
headbuf := make([]byte, 32)
if _, err := io.ReadFull(rw.conn, headbuf); err != nil {
return msg, err
}
// verify header mac
shouldMAC := updateMAC(rw.ingressMAC, rw.macCipher, headbuf[:16])
if !hmac.Equal(shouldMAC, headbuf[16:]) {
return msg, errors.New("bad header MAC")
}
rw.dec.XORKeyStream(headbuf[:16], headbuf[:16]) // first half is now decrypted
fsize := readInt24(headbuf)
// ignore protocol type for now
// read the frame content
var rsize = fsize // frame size rounded up to 16 byte boundary
if padding := fsize % 16; padding > 0 {
rsize += 16 - padding
}
framebuf := make([]byte, rsize)
if _, err := io.ReadFull(rw.conn, framebuf); err != nil {
return msg, err
}
// read and validate frame MAC. we can re-use headbuf for that.
rw.ingressMAC.Write(framebuf)
fmacseed := rw.ingressMAC.Sum(nil)
if _, err := io.ReadFull(rw.conn, headbuf[:16]); err != nil {
return msg, err
}
shouldMAC = updateMAC(rw.ingressMAC, rw.macCipher, fmacseed)
if !hmac.Equal(shouldMAC, headbuf[:16]) {
return msg, errors.New("bad frame MAC")
}
// decrypt frame content
rw.dec.XORKeyStream(framebuf, framebuf)
// decode message code
content := bytes.NewReader(framebuf[:fsize])
if err := rlp.Decode(content, &msg.Code); err != nil {
return msg, err
}
msg.Size = uint32(content.Len())
msg.meterSize = msg.Size
msg.Payload = content
// if snappy is enabled, verify and decompress message
if rw.snappy {
payload, err := ioutil.ReadAll(msg.Payload)
if err != nil {
return msg, err
}
size, err := snappy.DecodedLen(payload)
if err != nil {
return msg, err
}
if size > int(maxUint24) {
return msg, errPlainMessageTooLarge
}
payload, err = snappy.Decode(nil, payload)
if err != nil {
return msg, err
}
msg.Size, msg.Payload = uint32(size), bytes.NewReader(payload)
}
return msg, nil
}