diff --git a/.gitignore b/.gitignore index 8589039df..22bc8f008 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,6 @@ data *.sqlite-shm *.sqlite-wal **aggkit-001-data** -.vscode \ No newline at end of file +.vscode +debug +bin \ No newline at end of file diff --git a/aggsender/statuschecker/cert_status_checker.go b/aggsender/statuschecker/cert_status_checker.go index 972b83cb6..ff117d3ed 100644 --- a/aggsender/statuschecker/cert_status_checker.go +++ b/aggsender/statuschecker/cert_status_checker.go @@ -246,6 +246,9 @@ func (c *certStatusChecker) checkLastCertificateFromAgglayer(ctx context.Context func (c *certStatusChecker) executeInitialStatusAction(ctx context.Context, action *initialStatusResult, localCert *types.CertificateHeader, logFn types.EmitLogFunc) error { + if action.warning != "" { + c.log.Warnf("recovery: %s", action.warning) + } logFn("recovery: action: %s", action.String()) switch action.action { case InitialStatusActionNone: @@ -277,6 +280,10 @@ func (c *certStatusChecker) executeInitialStatusAction(ctx context.Context, if _, err := c.updateLocalStorageWithSettledAggLayerCert(ctx, action.cert); err != nil { return fmt.Errorf("recovery: error new local storage with agglayer certificate: %w", err) } + case InitialStatusActionDeleteLocalCert: + if err := c.storage.DeleteCertificate(nil, action.height, db.MaybeDelete); err != nil { + return fmt.Errorf("recovery: error deleting stale local certificate at height %d: %w", action.height, err) + } default: c.log.Warnf("recovery: error unknown action: %s", action.String()) return fmt.Errorf("recovery: unknown action: %s", action.action) diff --git a/aggsender/statuschecker/cert_status_checker_test.go b/aggsender/statuschecker/cert_status_checker_test.go index 24f4f5182..a42aeb23b 100644 --- a/aggsender/statuschecker/cert_status_checker_test.go +++ b/aggsender/statuschecker/cert_status_checker_test.go @@ -456,6 +456,16 @@ func TestExecuteInitialStatusAction(t *testing.T) { }, expectedError: "Waiting for it to be settled", }, + { + name: "Action DeleteLocalCert - success", + action: &initialStatusResult{ + action: InitialStatusActionDeleteLocalCert, + height: 7, + }, + mockFn: func(mockStorage *mocks.AggSenderStorage, mockCertQuerier *mocks.CertificateQuerier) { + mockStorage.EXPECT().DeleteCertificate(nil, uint64(7), db.MaybeDelete).Return(nil) + }, + }, { name: "Unknown Action", action: &initialStatusResult{ diff --git a/aggsender/statuschecker/initial_state.go b/aggsender/statuschecker/initial_state.go index 697febdb0..db07cf2c7 100644 --- a/aggsender/statuschecker/initial_state.go +++ b/aggsender/statuschecker/initial_state.go @@ -16,6 +16,9 @@ const ( InitialStatusActionNone initialStatusAction = iota InitialStatusActionUpdateCurrentCert InitialStatusActionInsertNewCert + InitialStatusActionDeleteLocalCert + + conflictingStatusResultsCapacity = 2 ) var ( @@ -38,13 +41,15 @@ type initialStatusAction int // String representation of the enum func (i initialStatusAction) String() string { - return [...]string{"None", "Update", "InsertNew"}[i] + return [...]string{"None", "Update", "InsertNew", "DeleteLocal"}[i] } type initialStatusResult struct { action initialStatusAction message string cert *agglayertypes.CertificateHeader + height uint64 + warning string } func newInitialStatusResult( @@ -58,6 +63,19 @@ func newInitialStatusResult( } } +func newInitialStatusDeleteResult(height uint64, message string) *initialStatusResult { + return &initialStatusResult{ + action: InitialStatusActionDeleteLocalCert, + message: message, + height: height, + } +} + +func (i *initialStatusResult) withWarning(warning string) *initialStatusResult { + i.warning = warning + return i +} + func (i *initialStatusResult) String() string { if i == nil { return types.NilStr @@ -69,6 +87,12 @@ func (i *initialStatusResult) String() string { } else { res += ", Cert: " + types.NilStr } + if i.action == InitialStatusActionDeleteLocalCert { + res += fmt.Sprintf(", Height: %d", i.height) + } + if i.warning != "" { + res += fmt.Sprintf(", Warning: %s", i.warning) + } return res } @@ -128,36 +152,32 @@ func (i *initialStatus) process() ([]*initialStatusResult, error) { results := make([]*initialStatusResult, 0, initialStatusResultsCapacity) - pendingCertAction, err := i.processLastLocalCert() + pendingCertActions, err := i.processLastLocalCert() if err != nil { return nil, fmt.Errorf("recovery: failed processing pending certificate: %w", err) } - if pendingCertAction != nil { - results = append(results, pendingCertAction) + if pendingCertActions != nil { + results = append(results, pendingCertActions...) } - settledCertAction, err := i.processLastSettledCert() - if err != nil { - return nil, fmt.Errorf("recovery: failed processing settled certificate: %w", err) - } - - if settledCertAction != nil { - results = append(results, settledCertAction) + settledCertActions := i.processLastSettledCert() + if settledCertActions != nil { + results = append(results, settledCertActions...) } return results, nil } // processLastLocalCert checks the last certificates from agglayer vs local certificates and returns the action to take -func (i *initialStatus) processLastLocalCert() (*initialStatusResult, error) { +func (i *initialStatus) processLastLocalCert() ([]*initialStatusResult, error) { if i.LocalLastCert == nil && i.AgglayerLastSettledCert == nil && i.AgglayerLastPendingCert != nil { if i.AgglayerLastPendingCert.Height == 0 { - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionInsertNewCert, "no settled cert yet, and the pending cert have the correct height (0) so we use it", i.AgglayerLastPendingCert, - ), nil + )}, nil } // We don't known if pendingCert is going to be Settled or InError. @@ -167,11 +187,11 @@ func (i *initialStatus) processLastLocalCert() (*initialStatusResult, error) { i.AgglayerLastPendingCert.ID(), i.AgglayerLastPendingCert.StatusString()) } if i.AgglayerLastPendingCert.Status.IsInError() && i.AgglayerLastPendingCert.Height > 0 { - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionNone, "the pending cert have wrong height and it's InError. We ignore it", nil, - ), nil + )}, nil } } aggLayerLastCert := i.getLatestAggLayerCert() @@ -179,56 +199,91 @@ func (i *initialStatus) processLastLocalCert() (*initialStatusResult, error) { // CASE 1: No certificates in local storage and agglayer if localLastCert == nil && aggLayerLastCert == nil { - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionNone, "no certificates in local storage and agglayer: initial state", nil, - ), nil + )}, nil } // CASE 2: No certificates in local storage but agglayer has one if localLastCert == nil && aggLayerLastCert != nil { - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionInsertNewCert, "no certificates in local storage but agglayer have one (no InError)", aggLayerLastCert, - ), nil + )}, nil } - // CASE 2.1: certificate in storage but not in agglayer - // this is a non-sense, so throw an error + // CASE 2.1: certificate in storage but not in agglayer. + // AggLayer is the source of truth, so drop the stale local cert and continue. if localLastCert != nil && aggLayerLastCert == nil { - return nil, fmt.Errorf("recovery: certificate exists in storage but not in agglayer. Inconsistency") + return []*initialStatusResult{ + newInitialStatusDeleteResult( + localLastCert.Height, + fmt.Sprintf( + "agglayer has no certificate for local height %d, deleting stale local certificate", + localLastCert.Height, + ), + ).withWarning(fmt.Sprintf( + "local latest certificate %s is not present in agglayer; agglayer is the source of truth", + localLastCert.ID(), + )), + }, nil } - // CASE 3.1: the certificate on the agglayer has less height than the one stored in the local storage + // CASE 3.1: the certificate on the agglayer has less height than the one stored in the local storage. + // Delete the stale local tip and let the next recovery iteration continue reconciling if needed. if aggLayerLastCert.Height < localLastCert.Height { - return nil, fmt.Errorf("recovery: the last certificate in the agglayer has less height (%d) "+ - "than the one in the local storage (%d)", aggLayerLastCert.Height, localLastCert.Height) + return []*initialStatusResult{ + newInitialStatusDeleteResult( + localLastCert.Height, + fmt.Sprintf("agglayer latest certificate is height %d, deleting stale local height %d", + aggLayerLastCert.Height, localLastCert.Height), + ).withWarning(fmt.Sprintf( + "local latest certificate %s is ahead of agglayer certificate %s; agglayer is the source of truth", + localLastCert.ID(), aggLayerLastCert.ID(), + )), + }, nil } // CASE 3.2: aggsender stopped between sending to agglayer and storing to the local storage if aggLayerLastCert.Height == localLastCert.Height+1 { // we need to store the certificate in the local storage. - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionInsertNewCert, fmt.Sprintf("agglayer have next cert, storing cert: %s", aggLayerLastCert.ID()), aggLayerLastCert, - ), nil + )}, nil } - // CASE 4: AggSender and AggLayer are not on the same page - // note: we don't need to check individual fields of the certificate - // because CertificateID is a hash of all the fields + // CASE 4: AggSender and AggLayer are not on the same page. + // AggLayer is authoritative, so reconcile local state to the AggLayer view. if localLastCert.CertificateID != aggLayerLastCert.CertificateID { - return nil, fmt.Errorf("recovery: Local certificate:\n %s \n is different from agglayer certificate:\n %s", - localLastCert.String(), aggLayerLastCert.String()) + results := make([]*initialStatusResult, 0, conflictingStatusResultsCapacity) + warning := fmt.Sprintf( + "local latest certificate %s does not match agglayer certificate %s at height %d; agglayer is the source of truth", + localLastCert.ID(), aggLayerLastCert.ID(), aggLayerLastCert.Height, + ) + if localLastCert.Height == aggLayerLastCert.Height { + results = append(results, newInitialStatusDeleteResult( + localLastCert.Height, + fmt.Sprintf("replacing conflicting local certificate at height %d with agglayer certificate %s", + localLastCert.Height, aggLayerLastCert.ID()), + ).withWarning(warning)) + } + results = append(results, newInitialStatusResult( + InitialStatusActionInsertNewCert, + fmt.Sprintf("syncing local certificate state to agglayer certificate %s", aggLayerLastCert.ID()), + aggLayerLastCert, + ).withWarning(warning)) + return results, nil } // CASE 5: AggSender and AggLayer are at same page // just update status - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionUpdateCurrentCert, fmt.Sprintf("aggsender same cert, updating state: %s", aggLayerLastCert.ID()), aggLayerLastCert, - ), nil + )}, nil } func (i *initialStatus) checkAgglayerConsistenceCerts() error { @@ -273,73 +328,98 @@ func (i *initialStatus) getLatestAggLayerCert() *agglayertypes.CertificateHeader } // processLastSettledCert checks the last settled certificate from agglayer vs local storage -func (i *initialStatus) processLastSettledCert() (*initialStatusResult, error) { +func (i *initialStatus) processLastSettledCert() []*initialStatusResult { if i.AgglayerLastPendingCert == nil { // if pending cert is nil, this will be processed in the processLastLocal function - return nil, nil + return nil } if i.AgglayerLastSettledCert == nil { // CASE 1: Local storage have settled certificate, but agglayer doesn't have one - // This is an invalid situation + // AggLayer is authoritative, so delete the stale local settled cert. if i.LocalLastSettledCert != nil { - return nil, fmt.Errorf("recovery: local settled certificate exists (%s)"+ - "but agglayer has no settled certificate", i.LocalLastSettledCert.ID()) + return []*initialStatusResult{ + newInitialStatusDeleteResult( + i.LocalLastSettledCert.Height, + fmt.Sprintf("agglayer has no settled certificate, deleting stale local settled certificate at height %d", + i.LocalLastSettledCert.Height), + ).withWarning(fmt.Sprintf( + "local settled certificate %s is not present in agglayer; agglayer is the source of truth", + i.LocalLastSettledCert.ID(), + )), + } } // CASE 2: Both local and agglayer have no settled certificate - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionNone, "agglayer and local storage have no settled certificate", i.AgglayerLastSettledCert, - ), nil + )} } if i.LocalLastSettledCert == nil { // CASE 3: We have no settled certificate in local storage - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionInsertNewCert, "no local settled certificate,inserting agglayer settled certificate into local storage", i.AgglayerLastSettledCert, - ), nil + )} } // CASE 4: We have a settled certificate in local storage // but its height is higher than the one in the agglayer if i.LocalLastSettledCert.Height > i.AgglayerLastSettledCert.Height { - return nil, fmt.Errorf("recovery: local settled certificate (%s) has higher height (%d) "+ - "than agglayer settled certificate (%s) with height (%d)", - i.LocalLastSettledCert.ID(), i.LocalLastSettledCert.Height, - i.AgglayerLastSettledCert.ID(), i.AgglayerLastSettledCert.Height) + return []*initialStatusResult{ + newInitialStatusDeleteResult( + i.LocalLastSettledCert.Height, + fmt.Sprintf("agglayer settled height is %d, deleting stale local settled certificate at height %d", + i.AgglayerLastSettledCert.Height, i.LocalLastSettledCert.Height), + ).withWarning(fmt.Sprintf( + "local settled certificate %s is ahead of agglayer settled certificate %s; agglayer is the source of truth", + i.LocalLastSettledCert.ID(), i.AgglayerLastSettledCert.ID(), + )), + } } // CASE 5: We have a settled certificate in local storage with same height if i.LocalLastSettledCert.Height == i.AgglayerLastSettledCert.Height { // CASE 5.1: We have a settled certificate in local storage // the height is the same but the certificate ID is different - // this is a problem, because it means that the local storage has a different certificate - // than the one in the agglayer for the same height + // than the one in the agglayer for the same height. Replace it with the agglayer value. if i.LocalLastSettledCert.CertificateID != i.AgglayerLastSettledCert.CertificateID { - return nil, fmt.Errorf("recovery: local settled certificate (%s) has same height (%d) "+ - "but different certificate ID (%s) than agglayer settled certificate (%s)", - i.LocalLastSettledCert.ID(), i.LocalLastSettledCert.Height, - i.LocalLastSettledCert.CertificateID, - i.AgglayerLastSettledCert.ID()) + warning := fmt.Sprintf( + "local settled certificate %s does not match agglayer settled certificate %s at height %d; "+ + "agglayer is the source of truth", + i.LocalLastSettledCert.ID(), i.AgglayerLastSettledCert.ID(), i.AgglayerLastSettledCert.Height, + ) + return []*initialStatusResult{ + newInitialStatusDeleteResult( + i.LocalLastSettledCert.Height, + fmt.Sprintf("replacing conflicting local settled certificate at height %d with agglayer settled certificate %s", + i.LocalLastSettledCert.Height, i.AgglayerLastSettledCert.ID()), + ).withWarning(warning), + newInitialStatusResult( + InitialStatusActionInsertNewCert, + "updating local storage with agglayer settled certificate after deleting conflicting local entry", + i.AgglayerLastSettledCert, + ).withWarning(warning), + } } // CASE 5.2: the local settled certificate matches the agglayer settled certificate - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionNone, "last settled certificate already in local storage with same height and ID", i.AgglayerLastSettledCert, - ), nil + )} } // CASE 6: We have a settled certificate in local storage that is lower than the one in the agglayer // this means that we need to update the local storage with the agglayer settled - return newInitialStatusResult( + return []*initialStatusResult{newInitialStatusResult( InitialStatusActionInsertNewCert, "updating local storage with agglayer settled certificate", i.AgglayerLastSettledCert, - ), nil + )} } diff --git a/aggsender/statuschecker/initial_state_test.go b/aggsender/statuschecker/initial_state_test.go index eb7671545..0cad08149 100644 --- a/aggsender/statuschecker/initial_state_test.go +++ b/aggsender/statuschecker/initial_state_test.go @@ -20,6 +20,7 @@ type initialStateResultTest struct { action initialStatusAction subMsg string cert *certTestData + height uint64 } type testCaseData struct { @@ -79,21 +80,27 @@ func TestInitialStateInconsistence(t *testing.T) { localCert: &certTestData{hash1, 2, agglayertypes.InError}, agglayerSettled: nil, agglayerPending: nil, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionDeleteLocalCert, "", nil, 2}, + }, }, { name: "5|ID1, h1 , Settled | nil | nil | AggSender incosistence", localCert: &certTestData{hash1, 2, agglayertypes.Settled}, agglayerSettled: nil, agglayerPending: nil, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionDeleteLocalCert, "", nil, 2}, + }, }, { name: "6|ID1, h1 , !=closed | nil | nil | incosistence", localCert: &certTestData{hash1, 0, agglayertypes.Proven}, agglayerSettled: nil, agglayerPending: nil, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionDeleteLocalCert, "", nil, 0}, + }, }, { name: "7|ID1, h3 , NA | NA | ID2, h2 , !=InError | AggSender incosistence", @@ -107,21 +114,29 @@ func TestInitialStateInconsistence(t *testing.T) { localCert: &certTestData{hash1, 3, agglayertypes.Proven}, agglayerSettled: &certTestData{hash2, 2, agglayertypes.Proven}, agglayerPending: nil, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionDeleteLocalCert, "", nil, 3}, + }, }, { name: "9|ID2, h2 , NA | ID1, h3 , N/A | ID3, h4 , !=inError | AggSender incosistence (2cert jump)", localCert: &certTestData{hash1, 2, agglayertypes.Proven}, agglayerSettled: &certTestData{hash2, 3, agglayertypes.Settled}, agglayerPending: &certTestData{hash2, 4, agglayertypes.Proven}, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 4, agglayertypes.Proven}, 0}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 3, agglayertypes.Settled}, 0}, + }, }, { name: "10|ID2, h2 , NA | ID1, h3 , N/A | ID3, h4 , inError | AggSender incosistence (2cert jump)", localCert: &certTestData{hash1, 2, agglayertypes.Proven}, agglayerSettled: &certTestData{hash2, 3, agglayertypes.Settled}, agglayerPending: &certTestData{hash2, 4, agglayertypes.InError}, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 4, agglayertypes.InError}, 0}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 3, agglayertypes.Settled}, 0}, + }, }, } runTestCases(t, tests) @@ -161,7 +176,7 @@ func TestRegularCases(t *testing.T) { agglayerSettled: nil, agglayerPending: nil, resultActions: []*initialStateResultTest{ - {InitialStatusActionNone, "", nil}, // for pending cert + {InitialStatusActionNone, "", nil, 0}, // for pending cert }, }, { @@ -170,8 +185,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: nil, agglayerPending: &certTestData{hash1, 0, agglayertypes.InError}, resultActions: []*initialStateResultTest{ - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 0, agglayertypes.InError}}, // for pending cert - {InitialStatusActionNone, "", nil}, // for settled cert + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 0, agglayertypes.InError}, 0}, // for pending cert + {InitialStatusActionNone, "", nil, 0}, // for settled cert }, }, { @@ -180,8 +195,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: nil, agglayerPending: &certTestData{hash1, 1, agglayertypes.InError}, resultActions: []*initialStateResultTest{ - {InitialStatusActionNone, "", nil}, // for pending cert - {InitialStatusActionNone, "", nil}, // for settled cert + {InitialStatusActionNone, "", nil, 0}, // for pending cert + {InitialStatusActionNone, "", nil, 0}, // for settled cert }, }, { @@ -190,8 +205,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: nil, agglayerPending: &certTestData{hash1, 0, agglayertypes.Proven}, resultActions: []*initialStateResultTest{ - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 0, agglayertypes.Proven}}, // for pending cert - {InitialStatusActionNone, "", nil}, // for settled cert + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 0, agglayertypes.Proven}, 0}, // for pending cert + {InitialStatusActionNone, "", nil, 0}, // for settled cert }, }, { @@ -207,7 +222,7 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash1, 1, agglayertypes.Proven}, agglayerPending: nil, resultActions: []*initialStateResultTest{ - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Proven}}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Proven}, 0}, }, }, { @@ -216,8 +231,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash1, 1, agglayertypes.Proven}, agglayerPending: &certTestData{hash2, 2, agglayertypes.InError}, resultActions: []*initialStateResultTest{ - {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 2, agglayertypes.InError}}, - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Proven}}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 2, agglayertypes.InError}, 0}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Proven}, 0}, }, }, { @@ -226,8 +241,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash1, 1, agglayertypes.Settled}, agglayerPending: &certTestData{hash2, 2, agglayertypes.Pending}, resultActions: []*initialStateResultTest{ - {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 2, agglayertypes.Pending}}, - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Settled}}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 2, agglayertypes.Pending}, 0}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Settled}, 0}, }, }, { @@ -236,8 +251,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: nil, agglayerPending: &certTestData{hash1, 1, agglayertypes.InError}, resultActions: []*initialStateResultTest{ - {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 1, agglayertypes.InError}}, // for pending cert - {InitialStatusActionNone, "", nil}, // for settled cert + {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 1, agglayertypes.InError}, 0}, // for pending cert + {InitialStatusActionNone, "", nil, 0}, // for settled cert }, }, { @@ -246,8 +261,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash1, 1, agglayertypes.Settled}, agglayerPending: &certTestData{hash2, 2, agglayertypes.InError}, resultActions: []*initialStateResultTest{ - {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash2, 2, agglayertypes.InError}}, - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Settled}}, + {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash2, 2, agglayertypes.InError}, 0}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 1, agglayertypes.Settled}, 0}, }, }, { @@ -255,7 +270,7 @@ func TestRegularCases(t *testing.T) { localCert: &certTestData{hash2, 2, agglayertypes.Proven}, agglayerSettled: &certTestData{hash1, 3, agglayertypes.Proven}, agglayerPending: nil, - resultActions: []*initialStateResultTest{{InitialStatusActionInsertNewCert, "", &certTestData{hash1, 3, agglayertypes.Proven}}}, + resultActions: []*initialStateResultTest{{InitialStatusActionInsertNewCert, "", &certTestData{hash1, 3, agglayertypes.Proven}, 0}}, }, { name: "12|ID2, h2 , NA | ID1, h2 , settled | ID1, h3 , !=inError | store(PENDING)&store(SETTLED)", @@ -263,8 +278,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash1, 2, agglayertypes.Settled}, agglayerPending: &certTestData{hash1, 3, agglayertypes.Proven}, resultActions: []*initialStateResultTest{ - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 3, agglayertypes.Proven}}, - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 2, agglayertypes.Settled}}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 3, agglayertypes.Proven}, 0}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 2, agglayertypes.Settled}, 0}, }, }, { @@ -274,8 +289,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash1, 2, agglayertypes.Settled}, agglayerPending: &certTestData{hash1, 3, agglayertypes.InError}, resultActions: []*initialStateResultTest{ - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 3, agglayertypes.InError}}, - {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 2, agglayertypes.Settled}}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 3, agglayertypes.InError}, 0}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash1, 2, agglayertypes.Settled}, 0}, }, }, { @@ -284,7 +299,10 @@ func TestRegularCases(t *testing.T) { localSettledCert: &certTestData{hash1, 2, agglayertypes.Settled}, agglayerSettled: &certTestData{hash2, 1, agglayertypes.Settled}, agglayerPending: &certTestData{hash1, 3, agglayertypes.Pending}, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 3, agglayertypes.Pending}, 0}, + {InitialStatusActionDeleteLocalCert, "", nil, 2}, + }, }, { name: "15| LocalCert: ID3, h1, pending | LocalSettled: ID2, h1 | AgglayerSettled: ID2, h1 | AgglayerPending: ID3, h1 | store(PENDING) & none(SETTLED)", @@ -293,8 +311,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash2, 2, agglayertypes.Settled}, agglayerPending: &certTestData{hash1, 3, agglayertypes.Pending}, resultActions: []*initialStateResultTest{ - {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 3, agglayertypes.Pending}}, // for pending cert - {InitialStatusActionNone, "", nil}, // for settled cert + {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 3, agglayertypes.Pending}, 0}, // for pending cert + {InitialStatusActionNone, "", nil, 0}, // for settled cert }, }, { @@ -303,7 +321,11 @@ func TestRegularCases(t *testing.T) { localSettledCert: &certTestData{hash1, 2, agglayertypes.Settled}, agglayerSettled: &certTestData{hash2, 2, agglayertypes.Settled}, agglayerPending: &certTestData{hash1, 3, agglayertypes.Pending}, - resultError: true, + resultActions: []*initialStateResultTest{ + {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 3, agglayertypes.Pending}, 0}, + {InitialStatusActionDeleteLocalCert, "", nil, 2}, + {InitialStatusActionInsertNewCert, "", &certTestData{hash2, 2, agglayertypes.Settled}, 0}, + }, }, { name: "17| LocalCert: ID3, h1, pending | LocalSettled: ID1, h2 | AgglayerSettled: ID2, h3 | AgglayerPending: ID3, h1 | store(PENDING) & store(SETTLED)", @@ -312,8 +334,8 @@ func TestRegularCases(t *testing.T) { agglayerSettled: &certTestData{hash3, 2, agglayertypes.Settled}, agglayerPending: &certTestData{hash1, 3, agglayertypes.Pending}, resultActions: []*initialStateResultTest{ - {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 3, agglayertypes.Pending}}, // for pending cert - {InitialStatusActionInsertNewCert, "", &certTestData{hash3, 2, agglayertypes.Settled}}, // for settled cert + {InitialStatusActionUpdateCurrentCert, "", &certTestData{hash1, 3, agglayertypes.Pending}, 0}, // for pending cert + {InitialStatusActionInsertNewCert, "", &certTestData{hash3, 2, agglayertypes.Settled}, 0}, // for settled cert }, }, } @@ -368,6 +390,7 @@ func runTestCases(t *testing.T, tests []testCaseData) { fmt.Print("result:", action.String()) require.Equal(t, resultAction.action, action.action) require.Contains(t, action.message, resultAction.subMsg) + require.Equal(t, resultAction.height, action.height) if resultAction.cert != nil { require.NotNil(t, action.cert) require.Equal(t, resultAction.cert.CertificateID, action.cert.CertificateID) diff --git a/docs/backward_forward_let_runbook.md b/docs/backward_forward_let_runbook.md index 62299c834..cfb193fd0 100644 --- a/docs/backward_forward_let_runbook.md +++ b/docs/backward_forward_let_runbook.md @@ -1,984 +1,213 @@ -# Backward and Forward LET runbook +# Backward/Forward LET runbook ## Introduction -The **Local Exit Tree (LET)** is a Merkle tree maintained on L2 that tracks all bridge deposits originating from a given chain. Every time a bridge operation occurs on L2, a new leaf is appended to the LET. Periodically, the `aggsender` component bundles these leaves into a certificate and sends it to the AggLayer, which settles the resulting **Local Exit Root (LER)** on L1. +The Local Exit Tree (LET) on L2 must stay consistent with the Local Exit Root (LER) +settled on L1 through the AggLayer. When they diverge, future certificates can be +rejected until the L2 bridge is reconciled. -Under normal operation, the LET on L2 and the LER settled on L1 stay in sync. However, certain failure scenarios can cause them to **diverge**: L1 has a settled LER that does not match the actual state of the LET on L2. When this happens, the L2 network must reconcile its LET to match what was settled on L1, otherwise future certificates will be rejected by the AggLayer because the LER will not match. +Use `backward-forward-let` for this workflow. The tool already: -To handle these cases, two admin smart contract functions are provided on the [`AgglayerBridgeL2`](https://agglayer.github.io/protocol-team-docs/smart-contracts/v12/AgglayerBridgeL2/) contract: +- reads the settled AggLayer state, +- reads the current L2 bridge state, +- queries aggsender for certificate bridge exits, +- finds the divergence point, +- classifies the recovery case, +- prints the recovery plan, +- activates emergency mode when needed, +- executes `BackwardLET` and/or `ForwardLET`, +- verifies deposit count and LER after each step, +- deactivates emergency mode at the end. -- **[`backwardLET`](https://agglayer.github.io/protocol-team-docs/smart-contracts/v12/AgglayerBridgeL2/#13-backwardlet)**: Rolls the LET backward to a previous state with fewer deposits. This is used to remove leaves that were added on L2 but do not match what was settled on L1. ([source](https://github.com/agglayer/agglayer-contracts/blob/v12.2.0/contracts/sovereignChains/AgglayerBridgeL2.sol#L732)) -- **[`forwardLET`](https://agglayer.github.io/protocol-team-docs/smart-contracts/v12/AgglayerBridgeL2/#14-forwardlet)**: Advances the LET by adding one or more leaves in a single transaction. This is used to insert leaves that were settled on L1 but are missing from the L2 tree. ([source](https://github.com/agglayer/agglayer-contracts/blob/v12.2.0/contracts/sovereignChains/AgglayerBridgeL2.sol#L797)) +This runbook documents the operator flow. It intentionally avoids manual diagnosis steps +that are already implemented in the tool. -Both functions can **only** be called while the `AgglayerBridgeL2` contract is in **emergency mode**, and only by an account holding the `GlobalExitRootRemover` role. +## When to run this -## Prerequisites - -Before starting, ensure you have these environment variables set. They are referenced throughout the runbook: - -```bash -# ── Network RPC endpoints ── -export L2_RPC_URL="" - -# ── Contract addresses (L2) ── -export BRIDGE_L2_ADDR="" -export GER_L2_ADDR="" - -# ── AggLayer endpoints ── -export AGGLAYER_GRPC="" - -# ── Bridge service endpoint ── -export BRIDGE_SERVICE_URL="" # e.g. http://localhost:8080/bridge/v1 - -# ── Network ID of the affected L2 chain ── -export NETWORK_ID="" - -# ── Private key of the account holding the GlobalExitRootRemover role ── -# This same account is used for backwardLET and forwardLET calls. -# For activateEmergencyState/deactivateEmergencyState, the emergencyBridgePauser -# and emergencyBridgeUnpauser keys are needed respectively (may be different accounts). -export GER_REMOVER_PK="" -export EMERGENCY_PAUSER_PK="" -export EMERGENCY_UNPAUSER_PK="" -``` - -### Verify role addresses - -Before proceeding, confirm which accounts hold each role: - -```bash -# Who can call backwardLET / forwardLET (GlobalExitRootRemover)? -cast call $GER_L2_ADDR "globalExitRootRemover()(address)" --rpc-url $L2_RPC_URL - -# Who can activate emergency state? -cast call $BRIDGE_L2_ADDR "emergencyBridgePauser()(address)" --rpc-url $L2_RPC_URL - -# Who can deactivate emergency state? -cast call $BRIDGE_L2_ADDR "emergencyBridgeUnpauser()(address)" --rpc-url $L2_RPC_URL -``` - -## Detection - -A backward/forward LET operation is needed when the LER settled on L1 diverges from the LET state on L2. This can be detected through the following indicators: - -### 1. Certificate rejected by the AggLayer - -The `aggsender` submits a certificate to the AggLayer, which rejects it because the `PrevLocalExitRoot` in the certificate does not match the last settled LER on L1. This is the most common first signal of divergence. - -The certificate transitions to `InError` status on the AggLayer side. The `aggsender` detects this via its periodic status checker and logs: - -| File | Line | Level | Message | -|------|------|-------|---------| -| `aggsender/statuschecker/cert_status_checker.go` | 187 | `INFO` | `certificate changed status from [] to [InError] elapsed time: full_cert (agglayer): ` | -| `aggsender/statuschecker/cert_status_checker.go` | 169 | `INFO` | `found InError certificate(s) with no pending certs, enabling retry` | -| `aggsender/aggsender.go` | 332 | `INFO` | `An InError cert exists. Sending a new one ()` | -| `aggsender/aggsender.go` | 365 | `ERROR` | `Certificate send trigger: error sending certificate: ` | -| `aggsender/aggsender.go` | 536 | `ERROR` | `error creating non accepted certificate: . Err: ` | -| `aggsender/aggsender.go` | 541 | `ERROR` | `error saving non accepted certificate: . Err: ` | - -**Recommended alarms**: alert on the `InError` status transition (`INFO` log at `cert_status_checker.go:187` matching `"changed status from.*to \[InError\]"`) and on the `ERROR` at `aggsender.go:365` (`"Certificate send trigger: error sending certificate"`). - -### 2. LER mismatch detected during certificate validation - -When the `aggsender` attempts to build and validate a new certificate, the local validator compares the certificate's `PrevLocalExitRoot` against the expected value. A mismatch surfaces as an error in the following paths: - -| File | Line | Level | Message | -|------|------|-------|---------| -| `aggsender/validator/validate_certificate.go` | 155 | `ERROR` (via `fmt.Errorf`) | `certificate PrevLocalExitRoot is not equal to previous certificate NewLocalExitRoot ` | -| `aggsender/validator/validate_certificate.go` | 196 | `ERROR` (via `fmt.Errorf`) | `first certificate must have correct starting PrevLocalExitRoot: , but got: ` | -| `aggsender/aggsender.go` | 432 | `WARN` | `error validating certificate locally: ` | -| `aggsender/aggsender.go` | 329 | `ERROR` | `error checking last certificate from agglayer: ` | - -**Recommended alarms**: alert on `WARN` at `aggsender.go:432` (`"error validating certificate locally"`) and on any log containing `"PrevLocalExitRoot"` and `"is not equal"` or `"but got"`. - -### 3. AggSender unable to build or send certificates - -When the `aggsender` repeatedly fails to build or submit a valid certificate (e.g., after a restart following a key compromise), it logs continuously on each retry cycle: +Run the tool when the bridge appears out of sync with the last settled AggLayer state, +for example: -| File | Line | Level | Message | -|------|------|-------|---------| -| `aggsender/aggsender.go` | 419 | `ERROR` (via `fmt.Errorf`) | `error getting certificate build params: ` | -| `aggsender/aggsender.go` | 428 | `ERROR` (via `fmt.Errorf`) | `error building certificate: ` | -| `aggsender/aggsender.go` | 460 | `ERROR` (via `fmt.Errorf`) | `error sending certificate: ` | -| `aggsender/aggsender.go` | 365 | `ERROR` | `Certificate send trigger: error sending certificate: ` | -| `aggsender/aggsender.go` | 359 | `ERROR` | `Certificate send trigger: error checking certificate status: ` | +- a certificate is rejected or transitions to `InError`, +- aggsender repeatedly fails to build or send certificates, +- an L2 reorg or aggsender issue is suspected to have settled the wrong LET state. -**Recommended alarms**: alert on repeated occurrences of `ERROR` at `aggsender.go:365` (`"Certificate send trigger: error sending certificate"`). A single occurrence may be transient; sustained repetition indicates a structural issue requiring investigation. +The tool determines whether there is actual divergence. Do not manually compare L1 and +L2 state unless you are debugging the tool itself. ---- - -**Root causes** that can trigger this divergence include: - -- **Compromised or buggy `aggsender`**: The `aggsender` private key is compromised or the component has a bug, causing it to craft and submit a certificate with leaves that do not correspond to actual L2 bridge events. -- **L2 network reorg (outpost networks)**: The L2 network reorgs after a certificate has already been settled on L1, meaning the block that contained certain bridge events no longer exists or has different contents. +## Prerequisites -## Diagnosis +Prepare an aggkit config file that includes the normal chain and AggLayer settings plus +the `BackwardForwardLET` section used by the tool. -Once detection signals indicate a divergence, the next step is to **determine the exact state on both sides** and identify which recovery case applies. This section provides concrete commands to gather all the data needed. +Required config inputs: -### Step 1: Query the AggLayer for settled state (L1 truth) +- `Common.L2RPC.URL` +- `BridgeL2Sync.BridgeAddr` +- `AgglayerClient` +- `BackwardForwardLET.BridgeServiceURL` +- `BackwardForwardLET.AggsenderRPCURL` +- `BackwardForwardLET.L2NetworkID` +- `BackwardForwardLET.GERRemoverKey` +- `BackwardForwardLET.EmergencyPauserKey` +- `BackwardForwardLET.EmergencyUnpauserKey` -The AggLayer's `GetNetworkInfo` gRPC call returns the last settled certificate details including the settled LER and leaf count: +Role expectations: -```bash -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -``` +- `GERRemoverKey` must be allowed to call `backwardLET` and `forwardLET`. +- `EmergencyPauserKey` must be allowed to activate emergency state. +- `EmergencyUnpauserKey` must be allowed to deactivate emergency state. -From the response, extract: -- `settled_ler` — the LER that L1 considers as truth -- `settled_let_leaf_count` — the deposit count at which L1 settled (this is the **L1 deposit count**) -- `settled_height` — the certificate height of the last settled certificate -- `settled_certificate_id` — the ID of that certificate +The tool handles emergency-mode activation and deactivation itself. There is no separate +manual pause/unpause step in the normal flow. -To get the full details of the last settled certificate: +For staged malicious-certificate drills used to create divergence intentionally: -```bash -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_SETTLED\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader -``` +- stop aggkit/aggsender before crafting or sending malicious certificates so normal + certificate production does not race the drill, +- confirm there is no unrelated non-error pending certificate already occupying the next + height before sending the malicious cert, +- if the drill includes genuine L2 bridge creation, wait for bridge-service indexing before + expecting diagnosis or recovery to reason about those bridges, +- restart aggkit/aggsender only after all malicious certificates for that drill have been + submitted. -This returns a `CertificateHeader` with: -- `prev_local_exit_root` — what the AggLayer expected as the starting LER -- `new_local_exit_root` — the LER after applying this certificate's leaves -- `height` — certificate height -- `status` — should be `SETTLED` (5) +## Standard procedure -If there is also a pending (possibly InError) certificate: +Run the tool: ```bash -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_PENDING\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader +backward-forward-let --cfg aggkit-config.toml ``` -If `status` is `IN_ERROR` (4), the `error` field will contain the rejection reason. - -### Step 2: Query the L2 bridge contract for current state +For non-interactive execution: ```bash -# Current deposit count on L2 -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL - -# Current LER (Merkle root of the LET) on L2 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL - -# Is the bridge in emergency state? -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL - -# Network ID (sanity check) -cast call $BRIDGE_L2_ADDR "networkID()(uint32)" --rpc-url $L2_RPC_URL +backward-forward-let --cfg aggkit-config.toml --yes ``` -### Step 3: Query the bridge service for sync status +What happens next: -The bridge service exposes a sync status endpoint that compares on-chain deposit counts with its local database: +1. The tool validates connectivity to the bridge service, L2 RPC, AggLayer, and aggsender. +2. It diagnoses the current state and prints one of: + - `NoDivergence` + - a recovery case with the divergence point and affected leaves + - a missing-certificate report if aggsender cannot provide bridge exits +3. If recovery is needed, it prints the exact recovery plan. +4. It asks for confirmation unless `--yes` is set. +5. It executes the required on-chain steps and verifies the resulting deposit count and LER. -```bash -curl -s "$BRIDGE_SERVICE_URL/sync-status" | jq . -``` +Operational notes from staging: -The response includes: -- `l2_info.contract_deposit_count` — on-chain deposit count -- `l2_info.synchronized_deposit_count` — how far the bridge service has synced -- `l2_info.is_synced` — whether the syncer is caught up +- A just-created genuine L2 bridge is not usable by the tool until bridge service has + indexed it. If diagnosis says a deposit is not indexed yet, wait for bridge-service + catch-up instead of improvising a manual recovery. +- In staged Case 3 drills, the state after only the first malicious certificate settles is + still effectively Case 1. Final Case 3 classification only appears after the second + malicious certificate also settles. -### Step 4: Compare L1 vs L2 and determine the case +Recovery behavior by case: -Save the key values: +- Case 1 and Case 3: `ForwardLET` only. +- Case 2 and Case 4: `BackwardLET`, then `ForwardLET` for divergent settled leaves, then a + second `ForwardLET` when extra real L2 bridges must be replayed. -```bash -# From AggLayer (Step 1) -L1_SETTLED_LER="" -L1_DEPOSIT_COUNT="" - -# From L2 contract (Step 2) -L2_LER=$(cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL) -L2_DEPOSIT_COUNT=$(cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL) - -echo "L1 settled LER: $L1_SETTLED_LER" -echo "L1 settled deposit count: $L1_DEPOSIT_COUNT" -echo "L2 current LER: $L2_LER" -echo "L2 current deposit count: $L2_DEPOSIT_COUNT" -``` +## Expected outcomes -**Important**: `L2_LER != L1_SETTLED_LER` does **not** by itself indicate divergence. Under normal operation L2 is ahead of L1 (the `aggsender` posts certificates periodically), so `L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT` and a different current root is perfectly expected. +- If the tool reports `NoDivergence`, no action is required. +- If the tool completes recovery successfully, the L2 bridge is reconciled to the settled + AggLayer state and emergency mode is turned off before exit. +- If the tool reports missing certificate bridge exits, stop and use the fallback flow + below. +- For staged Case 2 or Case 4 drills, if recovery replays genuine L2 bridges while + aggsender is still stopped, the first post-recovery rerun may still show divergence. + In that situation, restart aggsender, wait for the honest follow-up certificate(s) to + settle, then rerun until the tool reports `NoDivergence`. -The key validation is to check whether `L1_SETTLED_LER` **exists in L2's history** — i.e., whether L2's tree ever had that root at `L1_DEPOSIT_COUNT` deposits. +## Fallback when aggsender bridge exits are unavailable -#### Quick checks (no archive node needed) +If aggsender RPC cannot supply bridge exits for one or more settled certificate heights, +the tool prints an actionable report listing the missing heights and any certificate IDs +it could resolve automatically. -```bash -# If L2 has fewer deposits than L1 settled, divergence is certain. -# L1 should never settle leaves that don't exist on L2. -if [ "$L2_DEPOSIT_COUNT" -lt "$L1_DEPOSIT_COUNT" ]; then - echo "DIVERGENCE: L1 settled $L1_DEPOSIT_COUNT deposits but L2 only has $L2_DEPOSIT_COUNT" -fi - -# If deposit counts match, a simple root comparison suffices. -if [ "$L2_DEPOSIT_COUNT" -eq "$L1_DEPOSIT_COUNT" ]; then - if [ "$L2_LER" == "$L1_SETTLED_LER" ]; then - echo "No divergence — roots match at same deposit count" - else - echo "DIVERGENCE: same deposit count ($L2_DEPOSIT_COUNT) but different roots" - fi -fi -``` - -#### When L2 is ahead (`L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT`) - -L2 being ahead is normal. To confirm divergence, verify that `L1_SETTLED_LER` matches the L2 tree's historical root at `L1_DEPOSIT_COUNT`. This requires an **archive node** for the L2 RPC. +When aggsender is intentionally stopped for a fallback drill, this missing range may span +the full settled history from height `0` through the latest settled certificate. That is +expected; build an override file for the heights the tool needs and rerun with that data. -Use the bridge service to find the block boundary, then query the historical root: +Re-run the tool with an override file once you have the missing bridge exits: ```bash -# deposit_count in the bridge service is 0-indexed. -# L1_DEPOSIT_COUNT is the total leaf count, so the last settled deposit is at index L1_DEPOSIT_COUNT - 1. -# The first deposit AFTER the settled set is at index L1_DEPOSIT_COUNT. -FIRST_POST_SETTLE=$(curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$L1_DEPOSIT_COUNT" | jq -r '.block_num') - -if [ "$FIRST_POST_SETTLE" != "null" ] && [ -n "$FIRST_POST_SETTLE" ]; then - # Read the L2 root at the block BEFORE the first post-settlement deposit. - # At this point, L2 should have had exactly L1_DEPOSIT_COUNT leaves. - HISTORY_BLOCK=$((FIRST_POST_SETTLE - 1)) - L2_HISTORICAL_LER=$(cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" \ - --rpc-url $L2_RPC_URL --block $HISTORY_BLOCK) - - echo "L2 historical LER at block $HISTORY_BLOCK: $L2_HISTORICAL_LER" - echo "L1 settled LER: $L1_SETTLED_LER" - - if [ "$L2_HISTORICAL_LER" == "$L1_SETTLED_LER" ]; then - echo "No divergence — L1 settled LER exists in L2 history" - else - echo "DIVERGENCE CONFIRMED — L1 settled LER does NOT match L2 tree at deposit count $L1_DEPOSIT_COUNT" - fi -else - echo "Bridge at deposit_count=$L1_DEPOSIT_COUNT not found on L2 — verify bridge service sync status" -fi +backward-forward-let --cfg aggkit-config.toml \ + --cert-exits-file certificate_exits_override.json ``` -> **Note**: The archive-node query above assumes the first deposit after the settled set is in a different block than the last settled deposit. If multiple deposits land in the same block, the block boundary may not be exact. In that case, use the block of the last settled deposit (`deposit_count = L1_DEPOSIT_COUNT - 1`) and verify the deposit count at that block: -> ```bash -> LAST_SETTLED_BLOCK=$(curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$((L1_DEPOSIT_COUNT - 1))" | jq -r '.block_num') -> DEPOSIT_AT_BLOCK=$(cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL --block $LAST_SETTLED_BLOCK) -> # If DEPOSIT_AT_BLOCK == L1_DEPOSIT_COUNT, the root at this block is the one to compare. -> # If DEPOSIT_AT_BLOCK > L1_DEPOSIT_COUNT, more deposits landed in the same block — you'll need -> # to trace the transaction to get the intermediate root. -> ``` +The override file is only a fallback for missing certificate exits. Diagnosis and +recovery still stay tool-driven. -#### Summary +The same override file can also be supplied to `backward-forward-let craft-cert` when a +later malicious certificate must be crafted while aggsender is still unavailable. -| Condition | Result | -|-----------|--------| -| `L2_DEPOSIT_COUNT < L1_DEPOSIT_COUNT` | **Divergence** — L1 settled leaves that don't exist on L2 | -| `L2_DEPOSIT_COUNT == L1_DEPOSIT_COUNT` and `L2_LER == L1_SETTLED_LER` | **No divergence** | -| `L2_DEPOSIT_COUNT == L1_DEPOSIT_COUNT` and `L2_LER != L1_SETTLED_LER` | **Divergence** — same count, different roots | -| `L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT` and L1_SETTLED_LER **found** in L2 history | **No divergence** — L2 is simply ahead | -| `L2_DEPOSIT_COUNT > L1_DEPOSIT_COUNT` and L1_SETTLED_LER **NOT found** in L2 history | **Divergence** — L1 settled a root that L2 never had | +For the detailed fallback procedure, including AggLayer admin/debug endpoint +prerequisites and override-file handling examples, see +[`tools/backward_forward_let/RECOVERY_PROCEDURE.md`](../tools/backward_forward_let/RECOVERY_PROCEDURE.md). -### Step 5: List the L2 bridges (leaves) from the divergence point +### Step 1: fetch missing certificates from the AggLayer admin API -To understand which bridges exist on L2 after the last matching point, query the bridge service for each deposit count from the divergence point onwards: +For each certificate ID reported by the tool, call `admin_getCertificate`: ```bash -# Get the bridge at a specific deposit count on L2 -# Repeat for each deposit count from (last_matching_count + 1) to L2_DEPOSIT_COUNT -DEPOSIT_IDX=3 # example: first divergent position -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$DEPOSIT_IDX" | jq . -``` - -The response contains the full leaf data for that bridge: -- `leaf_type` (0=asset, 1=message) -- `origin_network` -- `origin_address` -- `destination_network` -- `destination_address` -- `amount` -- `metadata` - -Loop through all positions to build the list of L2 leaves: +AGGLAYER_ADMIN="http://localhost:4446" +CERT_ID="0xabc123...def456" -```bash -# Collect all L2 bridges from divergence point to current deposit count -DIVERGENCE_POINT=2 # last matching deposit count -for i in $(seq $((DIVERGENCE_POINT + 1)) $L2_DEPOSIT_COUNT); do - echo "=== Deposit $i ===" - curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=$i" | jq '{ - deposit_count, - leaf_type, - origin_network, - origin_address, - destination_network, - destination_address, - amount, - metadata - }' -done +curl -s -X POST "$AGGLAYER_ADMIN" \ + -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"admin_getCertificate\",\"params\":[\"$CERT_ID\"],\"id\":1}" ``` -### Step 6: List the L1-settled leaves (divergent leaves) - -The divergent leaves (BX, BY, ...) are the ones that were included in certificates settled on L1 but do not exist on L2. These leaves are part of the `bridge_exits` field of the settled certificates. - -The AggLayer gRPC API only exposes certificate **headers** (`GetCertificateHeader`), not full certificate bodies — it does not return the individual bridge exits. Retrieving the actual leaf data requires one of the following options. - -#### Option 1: aggsender certificate API (preferred) +Use `result[0].bridge_exits` from the response. -The `aggsender` stores the full body of every certificate it submits, including the `bridge_exits` array. A dedicated endpoint is being added to the `aggsender` to expose this data. It will be available before this runbook is released. +If the tool reports `CertID: UNKNOWN`, the AggLayer admin must first resolve that +certificate ID from AggLayer state before you can fetch its `bridge_exits`. -The endpoint will accept a certificate ID (or height) and return the full list of bridge exits for that certificate, including the leaf data needed for `forwardLET`: +### Step 2: build the override file -```bash -# Retrieve bridge exits for a specific certificate height -# The aggsender API base URL depends on your deployment configuration -AGGSENDER_API_URL="" -CERT_HEIGHT="" - -curl -s "$AGGSENDER_API_URL/certificate/$CERT_HEIGHT/bridge-exits" | jq . -``` +The override file must use Go JSON field names for `BridgeExit` objects: -The response will contain an array of bridge exit objects, each with: -- `leaf_type` (0=asset, 1=message) -- `origin_network` -- `origin_token_address` -- `dest_network` -- `dest_address` -- `amount` -- `metadata` - -These map directly to the `LeafData` fields required by `forwardLET`. - -> **Prerequisite**: The aggsender must be the same instance that submitted the divergent certificate (its DB holds that certificate's data). If the aggsender was replaced or its database was lost, fall back to Option 2. - -#### Option 2: contact the AggLayer node admin (fallback) - -If Option 1 is unavailable (aggsender DB lost, different aggsender instance, or the API is unreachable), contact the operator of the AggLayer node and request the full certificate body for the divergent certificate ID. - -Provide them with the certificate ID obtained in Step 1: - -```bash -# Certificate ID from GetNetworkInfo (settled_certificate_id) -echo "Certificate ID: $CERT_ID" -echo "Network ID: $NETWORK_ID" -echo "Height: " -``` - -The AggLayer node operator can retrieve the full certificate body — including all `bridge_exits` — from their internal storage and share the leaf data needed to construct the `forwardLET` call. - -### Summary: determining the recovery case - -After collecting the data above: - -| L2 has extra leaves beyond divergence? | L1 settled extra leaves beyond divergence? | Case | -|----------------------------------------|-------------------------------------------|------| -| No | No (single divergent leaf) | **Case 1** — forwardLET only | -| Yes | No (single divergent leaf) | **Case 2** — backwardLET then forwardLET | -| No | Yes (multiple divergent leaves) | **Case 3** — forwardLET only (multiple leaves) | -| Yes | Yes (multiple divergent leaves) | **Case 4** — backwardLET then forwardLET | - -## Recovery - -### Using the tool - -A dedicated tool to automate the recovery process is **under development**. Once available, this tool will: - -- Query the AggLayer node for the expected LER on L1 -- Compare it against the current LET state on L2 -- Determine the required sequence of `backwardLET` and `forwardLET` calls -- Compute the necessary Merkle proofs, frontiers, and leaf data -- Execute the smart contract calls in the correct order - -Until the tool is available, recovery must be performed manually as described below. - -### Contract function signatures reference - -Before proceeding, here are the exact Solidity function signatures (from [`AgglayerBridgeL2.sol` v12.2.0](https://github.com/agglayer/agglayer-contracts/blob/v12.2.0/contracts/sovereignChains/AgglayerBridgeL2.sol)): - -```solidity -// Roll the LET backward to a previous state -// Modifiers: onlyGlobalExitRootRemover, ifEmergencyState -function backwardLET( - uint256 newDepositCount, - bytes32[32] calldata newFrontier, - bytes32 nextLeaf, - bytes32[32] calldata proof -) external virtual onlyGlobalExitRootRemover ifEmergencyState; - -// Advance the LET by adding new leaves in bulk -// Modifiers: onlyGlobalExitRootRemover, ifEmergencyState -function forwardLET( - LeafData[] calldata newLeaves, - bytes32 expectedLER -) external virtual onlyGlobalExitRootRemover ifEmergencyState; - -struct LeafData { - uint8 leafType; // 0 = asset, 1 = message - uint32 originNetwork; - address originAddress; - uint32 destinationNetwork; - address destinationAddress; - uint256 amount; - bytes metadata; +```json +{ + "network_id": 1, + "description": "Extracted from agglayer admin_getCertificate", + "heights": { + "3": [ + { + "leaf_type": 0, + "token_info": { + "origin_network": 0, + "origin_token_address": "0x0000000000000000000000000000000000000000" + }, + "dest_network": 1, + "dest_address": "0xAbCd...1234", + "amount": "1000000000000000000", + "metadata": null + } + ] + } } - -// Emergency state management -// Modifier: onlyEmergencyBridgePauser -function activateEmergencyState() external onlyEmergencyBridgePauser; - -// Modifier: onlyEmergencyBridgeUnpauser -function deactivateEmergencyState() external onlyEmergencyBridgeUnpauser; -``` - -### Manually - -The manual recovery process follows these steps. Each step includes the exact CLI commands to execute. - -#### Step 1: Stop the `aggsender` - -Before performing any recovery operations, stop the `aggsender` to prevent it from interfering (e.g., attempting to send certificates while the bridge is in emergency mode). - -```bash -# Stop the aggsender process/container. -# The exact command depends on your deployment (systemd, docker, kubernetes, etc.) -# Example for docker: -docker stop aggsender - -# Example for systemd: -sudo systemctl stop aggsender -``` - -#### Step 2: Activate emergency mode - -Call `activateEmergencyState` on the bridge contract. This is a prerequisite for both `backwardLET` and `forwardLET`. - -```bash -# Verify emergency state is NOT already active -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL - -# Activate emergency state (requires emergencyBridgePauser key) -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK \ - --rpc-url $L2_RPC_URL - -# Confirm activation -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL -# Expected: true -``` - -#### Step 3: Roll back the LET if needed (`backwardLET`) - -This step is only needed if L2 has extra leaves beyond the divergence point (**Cases 2 and 4**). If only `forwardLET` is needed (**Cases 1 and 3**), skip to Step 4. - -The `backwardLET` function requires: -- `newDepositCount` — the target deposit count to roll back to (the divergence point) -- `newFrontier` — 32-element Merkle tree frontier array at the target deposit count -- `nextLeaf` — the leaf hash at position `newDepositCount` in the current tree (proof of inclusion) -- `proof` — Merkle proof that `nextLeaf` exists at position `newDepositCount` - -> **Computing `newFrontier`, `nextLeaf`, and `proof`**: These values require off-chain computation from the Merkle tree state. The recovery tool (when available) will compute these automatically. For manual computation, you need access to the full tree state (all leaves up to the current deposit count) to generate the frontier at the target count, the leaf hash at the boundary position, and a Merkle inclusion proof. - -```bash -# Example: roll back from deposit count 4 to deposit count 2 -# NEW_DEPOSIT_COUNT, NEW_FRONTIER, NEXT_LEAF, and PROOF must be computed off-chain -NEW_DEPOSIT_COUNT=2 -NEW_FRONTIER="[0x...,0x...,...]" # 32-element bytes32 array -NEXT_LEAF="0x..." # leaf hash at position newDepositCount -PROOF="[0x...,0x...,...]" # 32-element bytes32 Merkle proof - -cast send $BRIDGE_L2_ADDR \ - "backwardLET(uint256,bytes32[32],bytes32,bytes32[32])" \ - $NEW_DEPOSIT_COUNT \ - "$NEW_FRONTIER" \ - $NEXT_LEAF \ - "$PROOF" \ - --private-key $GER_REMOVER_PK \ - --rpc-url $L2_RPC_URL - -# Verify the rollback -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 2 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# Should match the LER at deposit count 2 -``` - -#### Step 4: Advance the LET (`forwardLET`) - -Call `forwardLET` to add the required leaves. This includes: -- The divergent leaf(s) settled on L1 (BX, BY, ...) -- If a `backwardLET` was performed in Step 3, the legitimate L2 bridges that were rolled back (B3, B4, ...) - -The leaves must be passed as an array of `LeafData` structs **in the correct order**: divergent leaves first, then the re-added legitimate L2 bridges. - -The `expectedLER` is the expected Merkle root after all leaves are inserted. It acts as a health check — if the computed root doesn't match, the transaction reverts. - -```bash -# Build the leaf data array. -# Each leaf is a tuple: (leafType, originNetwork, originAddress, destinationNetwork, destinationAddress, amount, metadata) -# -# Example for Case 2: insert BX (divergent), then B3 and B4 (legitimate) -# The leaf data comes from the diagnosis phase (Step 5 and Step 6 above) - -EXPECTED_LER="0x..." # the expected LER after all leaves are inserted - -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(0,1,0xOrigAddr1,2,0xDestAddr1,1000000000000000000,0x),(0,1,0xOrigAddr2,3,0xDestAddr2,2000000000000000000,0x),(0,1,0xOrigAddr3,3,0xDestAddr3,500000000000000000,0x)]" \ - $EXPECTED_LER \ - --private-key $GER_REMOVER_PK \ - --rpc-url $L2_RPC_URL - -# Verify the new state -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# The root should match EXPECTED_LER -``` - -**Computing `expectedLER`**: This is the Merkle root you expect after inserting all the leaves. It must be computed off-chain from the full leaf set. For **Cases 1 and 3** (forward-only), the expected LER after inserting all missing leaves should match the L1 settled LER if you're inserting exactly the leaves that were settled. For **Cases 2 and 4** (backward + forward), the expected LER must account for both the divergent leaves and the re-added legitimate leaves. - -#### Step 5: Deactivate emergency mode - -```bash -# Deactivate emergency state (requires emergencyBridgeUnpauser key) -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK \ - --rpc-url $L2_RPC_URL - -# Confirm deactivation -cast call $BRIDGE_L2_ADDR "isEmergencyState()(bool)" --rpc-url $L2_RPC_URL -# Expected: false ``` -#### Step 6: Rebalance the chain (if needed) +Constraints: -The bridge will be **undercollateralized** by the sum of amounts of all divergent leaves (BX, BY, ...). The AggLayer tracks a Local Balance Tree (LBT) for each chain, and if the LBT shows a negative balance, the next certificate will be rejected. +- `network_id` must match the affected L2 network. +- `heights` keys are certificate heights as decimal strings. +- `amount` is a decimal string. +- `metadata` is `null` or base64-encoded bytes. +- Use `dest_network` and `dest_address`, not Rust serde field names. -Check whether rebalancing is urgent by computing the total amount of divergent leaves: +### Step 3: rerun the tool ```bash -# Sum of amounts of all divergent leaves (BX, BY, ...) -# If this amount is significant, rebalancing must happen BEFORE starting the aggsender. - -# Rebalancing steps: -# 1. Bridge the required amount from another network (LX) into this chain -# 2. Claim the bridge on L2 -# 3. Burn the claimed amount on L2 -# -# These are standard bridge operations and depend on the specific token and network involved. +backward-forward-let --cfg aggkit-config.toml \ + --cert-exits-file certificate_exits_override.json ``` -#### Step 7: Start the `aggsender` - -Once the LET is corrected and rebalancing is complete (if needed), restart the `aggsender`: - -```bash -# Start the aggsender process/container -# Example for docker: -docker start aggsender - -# Example for systemd: -sudo systemctl start aggsender -``` - -After starting, the `aggsender` must craft a certificate covering the block range that includes the `BackwardLET` and `ForwardLET` events. Monitor its logs to verify: - -```bash -# Watch for successful certificate submission -# Look for log lines indicating successful certificate send -# and absence of the error patterns listed in the Detection section -``` - -The `aggsender` handles `BackwardLET` events (removing leaves from its internal DB) and `ForwardLET` events (adding leaves to its internal DB) automatically. - -#### Post-recovery verification - -After the `aggsender` resumes and submits a new certificate, verify everything is in sync: - -```bash -# 1. Check that the latest certificate is settled (not InError) -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_SETTLED\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader - -# 2. Verify L2 LER matches what AggLayer expects -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo - -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# These should be consistent - -# 3. Check bridge service sync status -curl -s "$BRIDGE_SERVICE_URL/sync-status" | jq . - -# 4. Verify no pending InError certificates -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID, \"type\": \"LATEST_CERTIFICATE_REQUEST_TYPE_PENDING\"}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetLatestCertificateHeader -``` - -### Cases - -The key factor determining the recovery steps is not just the root cause of the divergence, but the **combination of events that occurred after the LET diverged**. Specifically: - -- Did further bridges occur on L2 after the divergence point? -- Did further settlements occur on L1 after the first invalid one? - -The following scenarios use this notation: - -``` -L2: B1 -> LET_1, B2 -> LET_2, B3 -> LET_3, B4 -> LET_4 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X - ^ divergence point -``` - -Where `B1..B4` are bridge events, `BX` is a divergent leaf (settled on L1 but not matching L2), and `LET_N` is the LET root after leaf N. - ---- - -#### Case 1: Divergence with no further L2 bridges and no further L1 settlements - -**Scenario**: A single divergent leaf was settled on L1, no additional bridges have occurred on L2 since, and no further settlements have been made on L1. - -``` -L2: B1 -> LET_1, B2 -> LET_2 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X -``` - -**Diagnosis check**: - -```bash -# Confirm: L2 deposit count == L1 divergence point (e.g., 2) -# L1 settled deposit count == divergence point + number of divergent leaves (e.g., 3) -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 2 - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 3 -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. forwardLET — add BX to match L1 -# BX leaf data must be obtained from the settled certificate (see Diagnosis Step 6) -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF_TYPE,BX_ORIGIN_NET,BX_ORIGIN_ADDR,BX_DEST_NET,BX_DEST_ADDR,BX_AMOUNT,BX_METADATA)]" \ - $LET_X \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# 4. Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 3 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: LET_X - -# 5. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 6. (Optional) Re-collateralize, then start the aggsender -``` - -This is the simplest case: no backward operation is needed since L2 has no extra leaves beyond the divergence point. - -**Collateralization**: The bridge is **undercollateralized** by `amount(BX)` — L1 has credited those assets as having left L2, but they were never actually burned on L2. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This realigns the LBT on L2 with the LBT tracked by the AggLayer node. If the amount is significant, this must be done before starting the `aggsender` (step 6 above), as the AggLayer will reject the next certificate if the LBT shows a negative balance. - ---- - -#### Case 2: Divergence with further L2 bridges but no further L1 settlements - -**Scenario**: After the divergent leaf was settled on L1, additional bridges happened on L2 (but no further settlements occurred on L1). - -``` -L2: B1 -> LET_1, B2 -> LET_2, B3 -> LET_3, B4 -> LET_4 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X -``` - -L2 has leaves B3 and B4 that were added after the divergence point. These must be removed, the divergent leaf inserted, and then the legitimate leaves re-added. - -**Diagnosis check**: - -```bash -# L2 has more deposits than the divergence point -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 4 (divergence point 2 + 2 extra L2 bridges) - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 3 (divergence point 2 + 1 divergent leaf) - -# Collect leaf data for B3 and B4 (the L2 bridges to re-add) -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=3" | jq . -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=4" | jq . -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. backwardLET — roll back to deposit count 2 (removing B3 and B4) -# NEW_FRONTIER, NEXT_LEAF, PROOF must be computed off-chain -cast send $BRIDGE_L2_ADDR \ - "backwardLET(uint256,bytes32[32],bytes32,bytes32[32])" \ - 2 \ - "$NEW_FRONTIER" \ - $NEXT_LEAF \ - "$PROOF" \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify rollback -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 2 - -# 4. forwardLET — add BX, then B3, B4 in a single call -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF...),(B3_LEAF...),(B4_LEAF...)]" \ - $EXPECTED_LER \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 5 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: EXPECTED_LER - -# 5. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 6. (Optional) Re-collateralize, then start the aggsender -``` - -After recovery, the L2 LET will contain: B1, B2, BX, B3, B4 — with the first three matching L1's settled state. - -**Collateralization**: Same exposure as Case 1 — the bridge is **undercollateralized** by `amount(BX)`. The legitimate re-added leaves (B3, B4) correspond to real L2 events and do not contribute to undercollateralization. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This must be done before starting the `aggsender` if the resulting negative LBT balance would cause the next certificate to be rejected. - ---- - -#### Case 3: Divergence with no further L2 bridges but continued L1 settlements - -**Scenario**: Multiple settlements have occurred on L1 after the first divergent one, but no additional bridges happened on L2. - -``` -L2: B1 -> LET_1, B2 -> LET_2 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X, BY -> LET_Y -``` - -**Diagnosis check**: - -```bash -# L2 deposit count == divergence point -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 2 - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 4 (divergence point 2 + 2 divergent leaves) -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. forwardLET — add BX and BY to match L1 -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF...),(BY_LEAF...)]" \ - $LET_Y \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 4 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: LET_Y - -# 4. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 5. Re-collateralize (URGENT), then start the aggsender -``` - -No backward operation is needed since L2 has no extra leaves. The `forwardLET` call can batch-insert all missing leaves in a single transaction. - -**Collateralization**: The bridge is **undercollateralized** by `amount(BX) + amount(BY)`. This is the most collateralization-sensitive case among those with no backward step, as multiple bad settlements have accumulated. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX) + amount(BY)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This is **urgent** — the AggLayer will reject the next certificate if the LBT shows a negative balance, so this must be done before starting the `aggsender`. - ---- - -#### Case 4: Divergence with both further L2 bridges and continued L1 settlements - -**Scenario**: This is the most complex case. After the divergence, both additional bridges occurred on L2 and additional settlements were made on L1. - -``` -L2: B1 -> LET_1, B2 -> LET_2, B3 -> LET_3, B4 -> LET_4 -L1: B1 -> LET_1, B2 -> LET_2, BX -> LET_X, BY -> LET_Y -``` - -L2 has extra leaves (B3, B4) and L1 has settled additional leaves (BX, BY) beyond the divergence point. - -**Diagnosis check**: - -```bash -# L2 has more deposits than the divergence point -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL -# Expected: 4 - -grpcurl -plaintext -d "{\"network_id\": $NETWORK_ID}" \ - $AGGLAYER_GRPC \ - agglayer.node.v1.NodeStateService/GetNetworkInfo -# settled_let_leaf_count expected: 4 (2 matching + 2 divergent) - -# The LERs will differ -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL -# L2 root != L1 settled_ler, even though deposit counts may match - -# Collect leaf data for B3 and B4 -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=3" | jq . -curl -s "$BRIDGE_SERVICE_URL/bridge-by-deposit-count?network_id=$NETWORK_ID&deposit_count=4" | jq . -``` - -**Recovery steps**: - -```bash -# 1. Stop the aggsender -# 2. Activate emergency state -cast send $BRIDGE_L2_ADDR "activateEmergencyState()" \ - --private-key $EMERGENCY_PAUSER_PK --rpc-url $L2_RPC_URL - -# 3. backwardLET — roll back to deposit count 2 (removing B3 and B4) -cast send $BRIDGE_L2_ADDR \ - "backwardLET(uint256,bytes32[32],bytes32,bytes32[32])" \ - 2 \ - "$NEW_FRONTIER" \ - $NEXT_LEAF \ - "$PROOF" \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify rollback -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 2 - -# 4. forwardLET — add BX, BY (divergent), then B3, B4 (legitimate) in a single call -cast send $BRIDGE_L2_ADDR \ - "forwardLET((uint8,uint32,address,uint32,address,uint256,bytes)[],bytes32)" \ - "[(BX_LEAF...),(BY_LEAF...),(B3_LEAF...),(B4_LEAF...)]" \ - $EXPECTED_LER \ - --private-key $GER_REMOVER_PK --rpc-url $L2_RPC_URL - -# Verify -cast call $BRIDGE_L2_ADDR "depositCount()(uint256)" --rpc-url $L2_RPC_URL # Expected: 6 -cast call $BRIDGE_L2_ADDR "getRoot()(bytes32)" --rpc-url $L2_RPC_URL # Expected: EXPECTED_LER - -# 5. Deactivate emergency state -cast send $BRIDGE_L2_ADDR "deactivateEmergencyState()" \ - --private-key $EMERGENCY_UNPAUSER_PK --rpc-url $L2_RPC_URL - -# 6. Re-collateralize (URGENT), then start the aggsender -``` - -After recovery, the L2 LET will contain: B1, B2, BX, BY, B3, B4 — with the first four matching L1's settled state. - -**Collateralization**: The bridge is **undercollateralized** by `amount(BX) + amount(BY)`. This is the worst-case scenario: multiple bad settlements on L1 combined with legitimate L2 bridge activity. The legitimate re-added leaves (B3, B4) correspond to real L2 events and do not add to the undercollateralization. - -**Optional re-collateralization steps**: - -1. Bridge `amount(BX) + amount(BY)` from another network into this chain -2. Claim the bridged funds on L2 -3. Burn the claimed amount on L2 - -This must be done before starting the `aggsender`. Given that multiple invalid settlements have occurred, this is the case where the negative LBT balance is most likely to block the very next certificate. - ---- - -#### Important considerations across all cases -- **Re-collateralization**: The bridge will always be undercollateralized after recovery by the sum of amounts of all divergent leaves. Re-collateralization (bridge from another chain -> claim on L2 -> burn) must be completed before starting the `aggsender` whenever the resulting negative LBT balance would cause the next certificate to be rejected. See each case above for the specific amounts involved. -- **Stop aggsender first**: Always stop the `aggsender` before starting any recovery operations and only start it again after everything is complete (including deactivating emergency mode and re-collateralizing if needed). -- **Certificate crafting**: After recovery, the `aggsender` must craft a certificate that covers the block range containing all the `BackwardLET` and `ForwardLET` events. The certificate's initial block must be correct and all events in the range must be included. -- **Event parsing**: The `aggsender` must correctly handle `BackwardLET` events (removing leaves from its DB) and `ForwardLET` events (adding leaves to its DB) to maintain internal consistency. -- **Single `forwardLET` call**: Since `forwardLET` accepts an array of leaves, the divergent leaves and the re-added legitimate bridges should be combined into a single call when possible (e.g., `forwardLET([BX, B3, B4], ...)`), reducing the number of transactions. -- **Order of operations matters**: The `backwardLET` must always come before `forwardLET` when both are needed, since `backwardLET` requires the current tree state to compute valid Merkle proofs. After a `forwardLET`, the tree state has changed and any previously computed proofs for `backwardLET` would be invalid. - -## Appendix: API and gRPC reference - -### AggLayer gRPC — `NodeStateService` - -**Proto package**: `agglayer.node.v1` - -| RPC Method | Description | Key response fields | -|------------|-------------|---------------------| -| `GetNetworkInfo` | Current network state and settlement info | `settled_ler`, `settled_let_leaf_count`, `settled_height`, `settled_certificate_id`, `network_status` | -| `GetLatestCertificateHeader` | Latest certificate (settled or pending) | `prev_local_exit_root`, `new_local_exit_root`, `height`, `status`, `error` | -| `GetCertificateHeader` | Specific certificate by ID | Same as above | - -**`CertificateStatus` enum values**: `PENDING` (1), `PROVEN` (2), `CANDIDATE` (3), `IN_ERROR` (4), `SETTLED` (5) - -**`LatestCertificateRequestType` enum values**: `LATEST_CERTIFICATE_REQUEST_TYPE_SETTLED`, `LATEST_CERTIFICATE_REQUEST_TYPE_PENDING` - -### Bridge Service REST API - -**Base path**: `/bridge/v1` - -| Endpoint | Method | Key params | Description | -|----------|--------|------------|-------------| -| `/bridge-by-deposit-count` | GET | `network_id`, `deposit_count` | Get a single bridge by deposit count and network | -| `/bridges` | GET | `network_id`, `page_number`, `page_size` | Paginated list of bridges for a network | -| `/sync-status` | GET | — | Compare on-chain vs synced deposit counts | -| `/claim-proof` | GET | `network_id`, `leaf_index`, `deposit_count` | Merkle proofs for local and rollup exit roots | -| `/l1-info-tree-index` | GET | `network_id`, `deposit_count` | First L1 info tree index after a deposit count | - -### Smart contract view functions (`AgglayerBridgeL2`) - -| Function | Returns | Description | -|----------|---------|-------------| -| `depositCount()` | `uint256` | Current number of deposits in the LET | -| `getRoot()` | `bytes32` | Current Merkle root (LER) of the LET | -| `isEmergencyState()` | `bool` | Whether emergency mode is active | -| `networkID()` | `uint32` | Network ID of this L2 chain | -| `emergencyBridgePauser()` | `address` | Account that can activate emergency state | -| `emergencyBridgeUnpauser()` | `address` | Account that can deactivate emergency state | - -### Smart contract view functions (`AgglayerGERL2`) - -| Function | Returns | Description | -|----------|---------|-------------| -| `globalExitRootRemover()` | `address` | Account that can call `backwardLET`/`forwardLET` | -| `globalExitRootUpdater()` | `address` | Account that can insert global exit roots | +The tool will resume diagnosis using the override data, print the recovery plan, and +execute the same standard recovery flow. diff --git a/tools/backward_forward_let/README.md b/tools/backward_forward_let/README.md new file mode 100644 index 000000000..ed54a3567 --- /dev/null +++ b/tools/backward_forward_let/README.md @@ -0,0 +1,313 @@ +# `backward-forward-let` + +`backward-forward-let` diagnoses and recovers Local Exit Tree divergence between the +AggLayer's settled state and the current L2 bridge state. + +It also includes staging-only helper commands for certificate injection drills. + +## What the tool does + +The main command: + +- reads the settled AggLayer state, +- reads the current L2 bridge state, +- queries aggsender for settled certificate bridge exits, +- finds the divergence point, +- classifies the recovery case, +- prints the recovery plan, +- activates emergency state when needed, +- executes `BackwardLET` and/or `ForwardLET`, +- verifies the post-step deposit count and LER, +- deactivates emergency state before exit. + +## Configuration + +The tool reads the same TOML config format as aggkit and requires these sections: + +- `Common.L2RPC.URL` +- `BridgeL2Sync.BridgeAddr` +- `AgglayerClient` +- `BackwardForwardLET.BridgeServiceURL` +- `BackwardForwardLET.AggsenderRPCURL` +- `BackwardForwardLET.L2NetworkID` +- `BackwardForwardLET.GERRemoverKey` +- `BackwardForwardLET.EmergencyPauserKey` +- `BackwardForwardLET.EmergencyUnpauserKey` + +Example: + +```toml +[Common.L2RPC] +URL = "http://localhost:8545" + +[BridgeL2Sync] +BridgeAddr = "0x1111111111111111111111111111111111111111" + +[AgglayerClient.GRPC] +URL = "http://localhost:4443" +MinConnectTimeout = "5s" +RequestTimeout = "300s" +UseTLS = false + +[BackwardForwardLET] +BridgeServiceURL = "http://localhost:8080/bridge/v1" +AggsenderRPCURL = "http://localhost:5576" +L2NetworkID = 1 + +[BackwardForwardLET.GERRemoverKey] +Method = "local" +Path = "/path/to/ger-remover.keystore" +Password = "secret" + +[BackwardForwardLET.EmergencyPauserKey] +Method = "local" +Path = "/path/to/emergency-pauser.keystore" +Password = "secret" + +[BackwardForwardLET.EmergencyUnpauserKey] +Method = "local" +Path = "/path/to/emergency-unpauser.keystore" +Password = "secret" +``` + +Role requirements: + +- `GERRemoverKey` must be able to call `backwardLET` and `forwardLET`. +- `EmergencyPauserKey` must be able to activate emergency state. +- `EmergencyUnpauserKey` must be able to deactivate emergency state. + +## Main recovery command + +Diagnose and, after confirmation, execute recovery: + +```bash +backward-forward-let --cfg aggkit-config.toml +``` + +Run non-interactively: + +```bash +backward-forward-let --cfg aggkit-config.toml --yes +``` + +Use a bridge-exit override file when aggsender cannot provide settled certificate exits: + +```bash +backward-forward-let --cfg aggkit-config.toml \ + --cert-exits-file certificate_exits_override.json +``` + +### Output behavior + +The command prints one of: + +- `NoDivergence` +- a classified recovery case with divergence details +- a missing-certificate report when certificate exits cannot be loaded + +Recovery behavior by case: + +- Case 1 and Case 3: `ForwardLET` only +- Case 2 and Case 4: `BackwardLET`, then `ForwardLET`, and a second `ForwardLET` when + extra real L2 bridges must be replayed + +## Fallback when aggsender data is unavailable + +If the tool reports missing certificate exits, fetch them from the AggLayer admin/debug +endpoint and rerun with `--cert-exits-file`. + +Detailed procedure: + +- [`RECOVERY_PROCEDURE.md`](./RECOVERY_PROCEDURE.md) + +That document covers: + +- enabling `debug-mode = true`, +- reaching the AggLayer admin JSON-RPC API, +- using `admin_getCertificate`, +- building the override file, +- handling heights whose cert ID is not auto-resolved. + +## Commands + +### `backward-forward-let` + +Diagnose and recover divergence. + +Flags: + +- `--cfg`, `-c`: one or more config files +- `--yes`: skip interactive confirmation +- `--cert-exits-file`, `-f`: fallback JSON file with bridge exits keyed by certificate height + +### `backward-forward-let send-cert` + +Send a certificate JSON to the AggLayer and optionally store it in the aggsender DB. + +This is primarily useful for controlled staging drills and test tooling. + +Example: + +```bash +backward-forward-let send-cert \ + --cfg agglayer-only.toml \ + --cert-file /tmp/cert.json \ + --db-path /path/to/aggsender.sqlite +``` + +For fallback-mechanism drills where aggsender must not retain the certificate, send to +AggLayer only: + +```bash +backward-forward-let send-cert \ + --cfg agglayer-only.toml \ + --cert-file /tmp/cert.json \ + --no-db +``` + +Flags: + +- `--cfg`, `-c`: config file containing at least `AgglayerClient` +- `--cert-json`: certificate JSON string +- `--cert-file`, `-f`: certificate JSON file +- `--db-path`: aggsender SQLite DB path +- `--no-db`: skip aggsender DB storage entirely + +Behavior: + +- sends the certificate to the AggLayer, +- stores it in aggsender DB as the last sent certificate unless `--no-db` is set, +- derives `FromBlock` from the previous certificate when possible so aggsender retry logic remains coherent. + +### `backward-forward-let craft-cert` + +Build a signed malicious certificate JSON for staging drills. + +This command is intentionally gated by `--staging-only`. + +Example: + +```bash +backward-forward-let craft-cert \ + --cfg aggkit-config.toml \ + --staging-only \ + --num-fake-exits 1 \ + --out /tmp/malicious-cert.json +``` + +By default `craft-cert` reuses `AggSender.AggsenderPrivateKey` from the config, so the +same shared signer config used by aggsender can be used here as well, including GCP KMS +and other `go_signer` backends. + +To override the config for a one-off local keystore drill, pass the legacy CLI flags: + +```bash +backward-forward-let craft-cert \ + --cfg aggkit-config.toml \ + --signer-key-path /path/to/sequencer.keystore \ + --signer-key-password 'secret' \ + --staging-only \ + --num-fake-exits 1 \ + --out /tmp/malicious-cert.json +``` + +If aggkit/aggsender is stopped and aggsender RPC is unavailable, add `--db-path` so the +command can reconstruct prior settled bridge exits from the aggsender SQLite DB: + +```bash +backward-forward-let craft-cert \ + --cfg aggkit-config.toml \ + --signer-key-path /path/to/sequencer.keystore \ + --signer-key-password 'secret' \ + --db-path /path/to/aggsender.sqlite \ + --staging-only \ + --num-fake-exits 2 \ + --out /tmp/malicious-cert.json +``` + +If aggsender is intentionally stopped and neither aggsender RPC nor the local DB can +provide all historical bridge exits, reuse the same fallback override file used by the +main diagnosis command: + +```bash +backward-forward-let --cfg aggkit-config.toml \ + --cert-exits-file certificate_exits_override.json \ + craft-cert \ + --staging-only \ + --num-fake-exits 1 \ + --out /tmp/malicious-cert.json +``` + +Flags: + +- `--cfg`, `-c`: config file with normal tool connectivity settings +- `AggSender.AggsenderPrivateKey`: default signer config reused by `craft-cert` +- `--signer-key-path`: optional local-keystore override for the signer config +- `--signer-key-password`: password for the local-keystore override +- `--out`: write crafted JSON to a file instead of stdout +- `--db-path`: optional aggsender SQLite DB path when aggsender RPC is unavailable +- `--num-fake-exits`: number of fake exits to include +- `--starting-exit-index`: start index used to derive unique destination addresses +- `--nonce`: optional deterministic nonce used in fake destination derivation +- `--origin-network`: fake exit origin network +- `--origin-token-address`: fake exit origin token address +- `--destination-network`: fake exit destination network +- `--amount`: decimal amount for each fake exit +- `--staging-only`: required acknowledgement + +Behavior: + +- reads the current settled state from AggLayer, +- reconstructs the existing leaf sequence from aggsender RPC, aggsender DB, fallback + override data, and bridge service as needed, +- builds one or more fake `BridgeExit`s, +- computes the resulting `NewLocalExitRoot`, +- signs the crafted certificate, +- writes JSON that can be consumed by `send-cert`. + +## Staging drill flow + +To simulate divergence on a staging network: + +1. Stop aggkit/aggsender before crafting or sending any malicious certificate. + This prevents a genuine pending certificate from taking the next height while the drill + is being prepared. +2. Confirm there is no unrelated non-error pending certificate already occupying the next + height. If there is, wait for it to settle before continuing. + Re-check this immediately before each `send-cert`, because staging can advance while + you are waiting on settlement or bridge indexing. +3. Craft a malicious certificate with `craft-cert`. +4. Submit it with `send-cert`. + Use `--no-db` if you specifically want to test the fallback path where aggsender cannot + provide certificate bridge exits and operators must use the AggLayer admin/debug endpoint. +5. Keep aggkit/aggsender stopped until every malicious certificate needed for the current + drill has been submitted to AggLayer. +6. Restart aggkit/aggsender and wait for the certificate to settle. + On staging this settlement can take up to one hour. +7. Optionally create extra real L2 bridges if you want a Case 2 or Case 4 drill. + Wait for bridge service to index them before expecting diagnosis or recovery to use + them. +8. Run `backward-forward-let --cfg ...` to diagnose and recover. +9. After recovery, rerun the main command until it reports `NoDivergence`. + For Case 2 and Case 4, this may require restarting aggsender and waiting for the + honest follow-up certificate to settle after replayed genuine L2 bridges. + +Typical case mapping: + +- Case 1: one malicious cert, no extra L2 bridges +- Case 2: one malicious cert, then extra real L2 bridges +- Case 3: two malicious certs, no extra L2 bridges +- Case 4: two malicious certs, then extra real L2 bridges + +Intermediate expectations: + +- After only the first malicious cert in a Case 3 drill has settled, the network still + looks like Case 1. +- Final Case 3 classification only appears after the second malicious cert settles. + +## Safety notes + +- `craft-cert` and `send-cert` are for staging drills and controlled test environments. +- Do not use the debug commands against a production network. +- The recovery command itself is intended for real incidents, but only with the correct + signer roles and a verified operator workflow. diff --git a/tools/backward_forward_let/RECOVERY_PROCEDURE.md b/tools/backward_forward_let/RECOVERY_PROCEDURE.md index 9cd2549a8..8cd3976f7 100644 --- a/tools/backward_forward_let/RECOVERY_PROCEDURE.md +++ b/tools/backward_forward_let/RECOVERY_PROCEDURE.md @@ -14,6 +14,10 @@ from the agglayer node. `test/e2e/envs/op-pp/config/agglayer/config.toml`). - The agglayer admin JSON-RPC API must be reachable (default port 4446). The URL is exposed as `agglayer.services.admin_api.external` in `summary.json`. +- In some staging environments the admin API is protected by IAP or another identity + layer rather than being directly reachable on `localhost:4446`. In that case, obtain + the required bearer token first and pass it with the request headers when calling + `admin_getCertificate`. - `curl` and `jq` must be installed on the operator's machine (`jq` is optional but makes the JSON manipulation much more convenient). @@ -80,6 +84,17 @@ The response is a JSON-RPC result where `result` is a two-element array You need `result[0].bridge_exits` from each response. +If the admin API requires a bearer token, include it explicitly: + +```bash +JWT="..." +curl -s -X POST "$AGGLAYER_ADMIN" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $JWT" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"admin_getCertificate\",\"params\":[\"$CERT_ID\"],\"id\":1}" \ + | jq '.' +``` + --- ## Step 3 — Build the JSON override file @@ -192,6 +207,15 @@ The tool will: 3. Print the full diagnosis (case classification, divergent leaves, extra L2 bridges). 4. Prompt for confirmation, then execute the recovery plan. +Operational notes: + +- When aggsender is intentionally stopped for a fallback drill, the missing-height report + may span the full settled history rather than only the newest malicious certificate. + This is expected. +- You can build the override file incrementally as more certificates settle. Reuse the + same file on later diagnosis reruns, and also pass it to `craft-cert` if a later + malicious certificate must be built while aggsender is still unavailable. + --- ## Heights with UNKNOWN cert IDs diff --git a/tools/backward_forward_let/cmd/main.go b/tools/backward_forward_let/cmd/main.go index ea55020cf..52e6d3585 100644 --- a/tools/backward_forward_let/cmd/main.go +++ b/tools/backward_forward_let/cmd/main.go @@ -34,6 +34,67 @@ func main() { } app.Action = backward_forward_let.Run app.Commands = []*cli.Command{ + { + Name: "craft-cert", + Usage: "Build a signed malicious certificate JSON for staging drills", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "signer-key-path", + Usage: "Path to the keystore file used to sign the crafted certificate (overrides AggSender.AggsenderPrivateKey)", + }, + &cli.StringFlag{ + Name: "signer-key-password", + Usage: "Password for the keystore file used to sign the crafted certificate (used with --signer-key-path)", + }, + &cli.StringFlag{ + Name: "out", + Usage: "Write the crafted certificate JSON to this file instead of stdout", + }, + &cli.StringFlag{ + Name: "db-path", + Usage: "Optional path to the aggsender SQLite DB when aggsender RPC is unavailable", + }, + &cli.IntFlag{ + Name: "num-fake-exits", + Usage: "Number of fake bridge exits to include in the crafted certificate", + Value: 1, + }, + &cli.IntFlag{ + Name: "starting-exit-index", + Usage: "Starting index used to derive unique fake destination addresses", + Value: 0, + }, + &cli.StringFlag{ + Name: "nonce", + Usage: "Optional nonce used to derive deterministic fake destination addresses", + }, + &cli.UintFlag{ + Name: "origin-network", + Usage: "Origin network for fake bridge exits", + Value: 0, + }, + &cli.StringFlag{ + Name: "origin-token-address", + Usage: "Origin token address for fake bridge exits", + Value: "0x0000000000000000000000000000000000000000", + }, + &cli.UintFlag{ + Name: "destination-network", + Usage: "Destination network for fake bridge exits", + Value: 0, + }, + &cli.StringFlag{ + Name: "amount", + Usage: "Amount for each fake bridge exit, encoded as a decimal string", + Value: "0", + }, + &cli.BoolFlag{ + Name: "staging-only", + Usage: "Acknowledge that crafted malicious certificates are only for staging drills", + }, + }, + Action: backward_forward_let.RunCraftCert, + }, { Name: "send-cert", Usage: "Send a certificate to the agglayer and record it in the aggsender DB", @@ -48,9 +109,12 @@ func main() { Usage: "Path to a file containing the certificate JSON (mutually exclusive with --cert-json)", }, &cli.StringFlag{ - Name: "db-path", - Usage: "Path to the aggsender SQLite DB file (e.g. /path/to/aggsender.sqlite)", - Required: true, + Name: "db-path", + Usage: "Path to the aggsender SQLite DB file (e.g. /path/to/aggsender.sqlite)", + }, + &cli.BoolFlag{ + Name: "no-db", + Usage: "Send the certificate to the agglayer without storing it in the aggsender DB", }, &cli.StringFlag{ Name: "signer-key-path", diff --git a/tools/backward_forward_let/config.go b/tools/backward_forward_let/config.go index 685221f04..20a6b77a5 100644 --- a/tools/backward_forward_let/config.go +++ b/tools/backward_forward_let/config.go @@ -26,10 +26,19 @@ type Config struct { // AgglayerClient is the AggLayer gRPC client configuration. AgglayerClient agglayer.ClientConfig `mapstructure:"AgglayerClient"` + // AggSender contains the subset of aggsender config reused by craft-cert signer resolution. + AggSender CraftCertAggsenderConfig `mapstructure:"AggSender"` + // BackwardForwardLET contains tool-specific settings. BackwardForwardLET BackwardForwardLETConfig `mapstructure:"BackwardForwardLET"` } +// CraftCertAggsenderConfig contains the aggsender signer settings reused by craft-cert. +type CraftCertAggsenderConfig struct { + // AggsenderPrivateKey is the shared signer config used to sign certificates. + AggsenderPrivateKey signertypes.SignerConfig `mapstructure:"AggsenderPrivateKey"` +} + // BackwardForwardLETConfig contains configuration specific to the backward/forward LET tool. type BackwardForwardLETConfig struct { // GERRemoverKey is the signing key used for GER-removal and bridge admin operations. diff --git a/tools/backward_forward_let/craft_cert.go b/tools/backward_forward_let/craft_cert.go new file mode 100644 index 000000000..ed10f7fd4 --- /dev/null +++ b/tools/backward_forward_let/craft_cert.go @@ -0,0 +1,564 @@ +package backward_forward_let + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "math/big" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + aggsenderdb "github.com/agglayer/aggkit/aggsender/db" + aggsendertypes "github.com/agglayer/aggkit/aggsender/types" + "github.com/agglayer/aggkit/aggsender/validator" + bridgetypes "github.com/agglayer/aggkit/bridgesync/types" + aggkitgrpc "github.com/agglayer/aggkit/grpc" + "github.com/agglayer/aggkit/log" + "github.com/agglayer/go_signer/signer" + signertypes "github.com/agglayer/go_signer/signer/types" + "github.com/ethereum/go-ethereum/accounts/abi/bind" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/urfave/cli/v2" + "google.golang.org/grpc/codes" +) + +const defaultL1InfoTreeLeafCount uint32 = 1 + +const ( + craftCertFetchMaxAttempts = 6 + craftCertFetchInitialBackoff = 500 * time.Millisecond + craftCertFetchMaxBackoff = 5 * time.Second + craftCertRPCRequestTimeout = 5 * time.Second + craftCertFileMode = 0o600 +) + +type certStoreReader interface { + GetCertificateByHeight(height uint64) (*aggsendertypes.Certificate, error) + GetCertificateHeaderByHeight(height uint64) (*aggsendertypes.CertificateHeader, error) +} + +type craftCertOptions struct { + numFakeExits int + startingExitIndex int + nonce []byte + originNetwork uint32 + originTokenAddr common.Address + destNetwork uint32 + amount *big.Int +} + +// RunCraftCert is the CLI action for the craft-cert subcommand. +// It builds a signed malicious certificate JSON for staging drills. +func RunCraftCert(c *cli.Context) error { + if !c.Bool("staging-only") { + return fmt.Errorf("craft-cert requires --staging-only acknowledgement") + } + + cfg, err := LoadConfig(c) + if err != nil { + return err + } + if f := c.String("cert-exits-file"); f != "" { + cfg.BackwardForwardLET.CertificateExitsFile = f + } + + opts, err := craftCertOptionsFromCLI(c) + if err != nil { + return err + } + + dialCtx, dialCancel := context.WithTimeout(c.Context, dialTimeout) + env, err := SetupEnv(dialCtx, cfg) + dialCancel() + if err != nil { + return err + } + defer env.Close() + + var certStore certStoreReader + if dbPath := c.String("db-path"); dbPath != "" { + certStore, err = openCraftCertStorage(dbPath) + if err != nil { + return err + } + } + + certSigner, err := loadCraftCertSigner(c.Context, env, cfg, c) + if err != nil { + return err + } + + cert, err := craftMaliciousCertificate(c.Context, env, certStore, certSigner, opts) + if err != nil { + return err + } + + data, err := json.MarshalIndent(cert, "", " ") + if err != nil { + return fmt.Errorf("marshal crafted certificate: %w", err) + } + data = append(data, '\n') + + outPath := c.String("out") + if outPath == "" { + _, err = os.Stdout.Write(data) + return err + } + + if err := os.WriteFile(filepath.Clean(outPath), data, craftCertFileMode); err != nil { + return fmt.Errorf("write crafted certificate to %s: %w", outPath, err) + } + fmt.Printf("Crafted certificate written to %s\n", outPath) + return nil +} + +func craftCertOptionsFromCLI(c *cli.Context) (*craftCertOptions, error) { + numFakeExits := c.Int("num-fake-exits") + if numFakeExits <= 0 { + return nil, fmt.Errorf("--num-fake-exits must be greater than 0") + } + + amount, ok := new(big.Int).SetString(c.String("amount"), decimalBase) + if !ok { + return nil, fmt.Errorf("parse --amount %q as decimal big.Int", c.String("amount")) + } + + originTokenAddr := common.HexToAddress(c.String("origin-token-address")) + + nonce := c.String("nonce") + if nonce == "" { + nonce = strconv.FormatInt(time.Now().UnixNano(), decimalBase) + } + + return &craftCertOptions{ + numFakeExits: numFakeExits, + startingExitIndex: c.Int("starting-exit-index"), + nonce: []byte(nonce), + originNetwork: uint32(c.Uint("origin-network")), + originTokenAddr: originTokenAddr, + destNetwork: uint32(c.Uint("destination-network")), + amount: amount, + }, nil +} + +func loadCraftCertSigner( + ctx context.Context, + env *Env, + cfg *Config, + c *cli.Context, +) (signertypes.Signer, error) { + signerCfg, err := resolveCraftCertSignerConfig(cfg, c) + if err != nil { + return nil, err + } + + l2ChainID, err := env.chainIDFn(ctx) + if err != nil { + return nil, fmt.Errorf("get L2 chain ID for craft-cert signer: %w", err) + } + + signingKey, err := signer.NewSigner(ctx, l2ChainID.Uint64(), signerCfg, "craft-cert", log.GetDefaultLogger()) + if err != nil { + return nil, fmt.Errorf("load craft-cert signer: %w", err) + } + + if err := signingKey.Initialize(ctx); err != nil { + return nil, fmt.Errorf("initialize craft-cert signer: %w", err) + } + + return signingKey, nil +} + +func resolveCraftCertSignerConfig(cfg *Config, c *cli.Context) (signertypes.SignerConfig, error) { + if c.String("signer-key-path") != "" { + return signer.NewLocalSignerConfig(c.String("signer-key-path"), c.String("signer-key-password")), nil + } + + if cfg == nil { + return signertypes.SignerConfig{}, fmt.Errorf("craft-cert signer config is required") + } + + if cfg.AggSender.AggsenderPrivateKey.Method == "" { + return signertypes.SignerConfig{}, fmt.Errorf( + "craft-cert signer is not configured; set AggSender.AggsenderPrivateKey in config or pass --signer-key-path") + } + + return cfg.AggSender.AggsenderPrivateKey, nil +} + +func openCraftCertStorage(dbPath string) (certStoreReader, error) { + if dbPath == "" { + return nil, nil + } + storage, err := aggsenderdb.NewAggSenderSQLStorage(log.GetDefaultLogger(), aggsenderdb.AggSenderSQLStorageConfig{ + DBPath: dbPath, + CertificatesDir: filepath.Join(filepath.Dir(dbPath), "certificates"), + }) + if err != nil { + return nil, fmt.Errorf("open aggsender DB at %s: %w", dbPath, err) + } + return storage, nil +} + +func craftMaliciousCertificate( + ctx context.Context, + env *Env, + certStore certStoreReader, + certSigner signertypes.HashSigner, + opts *craftCertOptions, +) (*agglayertypes.Certificate, error) { + if opts == nil { + return nil, fmt.Errorf("craft certificate options are required") + } + if certSigner == nil { + return nil, fmt.Errorf("craft certificate signer is required") + } + + fakeBridgeExits := make([]*agglayertypes.BridgeExit, 0, opts.numFakeExits) + for i := 0; i < opts.numFakeExits; i++ { + fakeBridgeExits = append(fakeBridgeExits, makeFakeBridgeExit(opts, opts.startingExitIndex+i)) + } + + certHeight, prevLER, existingLeafCount, l1InfoTreeLeafCount, err := currentCertBaseState(ctx, env, certStore) + if err != nil { + return nil, err + } + + existingHashes, err := loadExistingLeafHashes(ctx, env, certStore, certHeight, prevLER, existingLeafCount) + if err != nil { + return nil, err + } + + newHashes := make([]common.Hash, 0, len(fakeBridgeExits)) + for _, be := range fakeBridgeExits { + newHashes = append(newHashes, BridgeExitLeafHash(be)) + } + + newLER, err := ComputeLERForNewLeaves(existingHashes, newHashes) + if err != nil { + return nil, fmt.Errorf("compute new local exit root: %w", err) + } + + cert := &agglayertypes.Certificate{ + NetworkID: env.L2NetworkID, + Height: certHeight, + PrevLocalExitRoot: prevLER, + NewLocalExitRoot: newLER, + BridgeExits: fakeBridgeExits, + L1InfoTreeLeafCount: l1InfoTreeLeafCount, + } + + hashToSign, err := validator.HashCertificateToSign(cert) + if err != nil { + return nil, fmt.Errorf("hash crafted certificate to sign: %w", err) + } + sig, err := certSigner.SignHash(ctx, hashToSign) + if err != nil { + return nil, fmt.Errorf("sign crafted certificate: %w", err) + } + + cert.AggchainData = &agglayertypes.AggchainDataMultisig{ + Multisig: &agglayertypes.Multisig{ + Signatures: []agglayertypes.ECDSAMultisigEntry{ + {Index: 0, Signature: sig}, + }, + }, + } + + return cert, nil +} + +func currentCertBaseState( + ctx context.Context, + env *Env, + certStore certStoreReader, +) (uint64, common.Hash, uint32, uint32, error) { + info, err := env.AgglayerClient.GetNetworkInfo(ctx, env.L2NetworkID) + if err != nil { + var grpcErr aggkitgrpc.GRPCError + if !errors.As(err, &grpcErr) || grpcErr.Code != codes.NotFound { + return 0, common.Hash{}, 0, 0, fmt.Errorf("get network info from agglayer: %w", err) + } + } + if err == nil && info.SettledHeight != nil { + if info.SettledLER == nil || info.SettledLETLeafCount == nil { + return 0, common.Hash{}, 0, 0, fmt.Errorf("agglayer returned incomplete settled state") + } + certHeight := *info.SettledHeight + 1 + existingLeafCount := uint32(*info.SettledLETLeafCount) + l1InfoTreeLeafCount := defaultL1InfoTreeLeafCount + + switch { + case certStore != nil: + header, headerErr := certStore.GetCertificateHeaderByHeight(*info.SettledHeight) + if headerErr == nil && header != nil && header.L1InfoTreeLeafCount > 0 { + l1InfoTreeLeafCount = header.L1InfoTreeLeafCount + } + default: + storedCert, certErr := env.AggsenderRPC.GetCertificateHeaderPerHeight(info.SettledHeight) + if certErr == nil && storedCert != nil && storedCert.Header != nil && storedCert.Header.L1InfoTreeLeafCount > 0 { + l1InfoTreeLeafCount = storedCert.Header.L1InfoTreeLeafCount + } + } + + return certHeight, *info.SettledLER, existingLeafCount, l1InfoTreeLeafCount, nil + } + + callOpts := &bind.CallOpts{Context: ctx} + root, rootErr := env.L2Bridge.GetRoot(callOpts) + if rootErr != nil { + return 0, common.Hash{}, 0, 0, fmt.Errorf("get L2 root for initial certificate: %w", rootErr) + } + + dcBig, dcErr := env.L2Bridge.DepositCount(callOpts) + if dcErr != nil { + return 0, common.Hash{}, 0, 0, fmt.Errorf("get L2 deposit count for initial certificate: %w", dcErr) + } + + return 0, common.Hash(root), uint32(dcBig.Uint64()), defaultL1InfoTreeLeafCount, nil +} + +func loadExistingLeafHashes( + ctx context.Context, + env *Env, + certStore certStoreReader, + certHeight uint64, + settledLER common.Hash, + existingLeafCount uint32, +) ([]common.Hash, error) { + if certHeight == 0 { + return loadLeafHashesFromBridgeService(ctx, env, existingLeafCount) + } + + bridgeMatchesSettled, err := currentBridgeMatchesSettled(ctx, env, settledLER, existingLeafCount) + if err != nil { + return nil, err + } + if bridgeMatchesSettled { + return loadLeafHashesFromBridgeService(ctx, env, existingLeafCount) + } + + settledHeight := certHeight - 1 + hashes := make([]common.Hash, 0, existingLeafCount) + prefixMissing := true + for h := uint64(0); h <= settledHeight; h++ { + exits, err := getStoredBridgeExitsForHeight(env, certStore, h) + if err != nil { + if !prefixMissing { + return nil, fmt.Errorf("load certificate bridge exits at height %d after later heights already loaded: %w", h, err) + } + continue + } + prefixMissing = false + for _, be := range exits { + hashes = append(hashes, BridgeExitLeafHash(be)) + } + } + + if uint32(len(hashes)) > existingLeafCount { + return nil, fmt.Errorf( + "loaded %d historical leaf hashes, exceeds expected settled leaf count %d", + len(hashes), existingLeafCount, + ) + } + + missingPrefixLeafCount := existingLeafCount - uint32(len(hashes)) + if missingPrefixLeafCount > 0 { + prefixHashes, err := loadLeafHashesFromBridgeService(ctx, env, missingPrefixLeafCount) + if err != nil { + return nil, fmt.Errorf("reconstruct missing certificate prefix from bridge service: %w", err) + } + hashes = append(prefixHashes, hashes...) + } + + if uint32(len(hashes)) != existingLeafCount { + return nil, fmt.Errorf("reconstructed %d total leaf hashes, expected %d", len(hashes), existingLeafCount) + } + + return hashes, nil +} + +func currentBridgeMatchesSettled( + ctx context.Context, + env *Env, + settledLER common.Hash, + existingLeafCount uint32, +) (bool, error) { + if env == nil || env.L2Bridge == nil { + return false, nil + } + + callOpts := &bind.CallOpts{Context: ctx} + root, err := env.L2Bridge.GetRoot(callOpts) + if err != nil { + return false, fmt.Errorf("get L2 root for settled-state comparison: %w", err) + } + + dcBig, err := env.L2Bridge.DepositCount(callOpts) + if err != nil { + return false, fmt.Errorf("get L2 deposit count for settled-state comparison: %w", err) + } + + return common.Hash(root) == settledLER && uint32(dcBig.Uint64()) == existingLeafCount, nil +} + +func loadLeafHashesFromBridgeService(ctx context.Context, env *Env, existingLeafCount uint32) ([]common.Hash, error) { + hashes := make([]common.Hash, 0, existingLeafCount) + for dc := uint32(0); dc < existingLeafCount; dc++ { + br, err := env.BridgeService.GetBridgeByDepositCount(ctx, env.L2NetworkID, dc) + if err != nil { + return nil, fmt.Errorf("get bridge service leaf at deposit count %d: %w", dc, err) + } + hashes = append(hashes, BridgeResponseLeafHash(br)) + } + return hashes, nil +} + +func getStoredBridgeExitsForHeight( + env *Env, + certStore certStoreReader, + height uint64, +) ([]*agglayertypes.BridgeExit, error) { + if env != nil && env.BridgeExitsOverride != nil { + if exits, ok := env.BridgeExitsOverride.GetExits(height); ok { + return exits, nil + } + } + + if certStore != nil { + cert, err := certStore.GetCertificateByHeight(height) + if err != nil { + return nil, err + } + if cert == nil { + return nil, fmt.Errorf("certificate not found") + } + if cert.Header != nil && cert.Header.CertSource == aggsendertypes.CertificateSourceAggLayer { + return nil, fmt.Errorf("certificate at height %d has agglayer source and no local bridge exits", height) + } + if cert.SignedCertificate == nil { + return nil, fmt.Errorf("certificate at height %d has no signed certificate payload", height) + } + return parseBridgeExitsFromSignedCertificate(height, *cert.SignedCertificate) + } + + var lastErr error + backoff := craftCertFetchInitialBackoff + for attempt := 1; attempt <= craftCertFetchMaxAttempts; attempt++ { + cert, headerErr := callCraftCertRPCWithTimeout( + func() (*aggsendertypes.Certificate, error) { + return env.AggsenderRPC.GetCertificateHeaderPerHeight(&height) + }, + ) + if headerErr == nil && cert != nil && cert.SignedCertificate != nil { + exits, parseErr := parseBridgeExitsFromSignedCertificate(height, *cert.SignedCertificate) + if parseErr == nil { + return exits, nil + } + } else if headerErr != nil { + lastErr = headerErr + if isRetryableCraftCertFetchError(headerErr) { + if attempt == craftCertFetchMaxAttempts { + break + } + time.Sleep(backoff) + if backoff < craftCertFetchMaxBackoff { + backoff *= 2 + if backoff > craftCertFetchMaxBackoff { + backoff = craftCertFetchMaxBackoff + } + } + continue + } + } + + exits, err := callCraftCertRPCWithTimeout( + func() ([]*agglayertypes.BridgeExit, error) { + return env.AggsenderRPC.GetCertificateBridgeExits(&height) + }, + ) + if err == nil { + return exits, nil + } + lastErr = err + + if !isRetryableCraftCertFetchError(err) && !isRetryableCraftCertFetchError(lastErr) { + return nil, lastErr + } + + if attempt == craftCertFetchMaxAttempts { + break + } + time.Sleep(backoff) + if backoff < craftCertFetchMaxBackoff { + backoff *= 2 + if backoff > craftCertFetchMaxBackoff { + backoff = craftCertFetchMaxBackoff + } + } + } + + return nil, lastErr +} + +func callCraftCertRPCWithTimeout[T any](fn func() (T, error)) (T, error) { + type result struct { + value T + err error + } + + resultCh := make(chan result, 1) + go func() { + value, err := fn() + resultCh <- result{value: value, err: err} + }() + + select { + case result := <-resultCh: + return result.value, result.err + case <-time.After(craftCertRPCRequestTimeout): + var zero T + return zero, fmt.Errorf("aggsender RPC request timed out after %s", craftCertRPCRequestTimeout) + } +} + +func parseBridgeExitsFromSignedCertificate(height uint64, signedCert string) ([]*agglayertypes.BridgeExit, error) { + var agglayerCert agglayertypes.Certificate + if err := json.Unmarshal([]byte(signedCert), &agglayerCert); err != nil { + return nil, fmt.Errorf("unmarshal signed certificate at height %d: %w", height, err) + } + return agglayerCert.BridgeExits, nil +} + +func isRetryableCraftCertFetchError(err error) bool { + if err == nil { + return false + } + msg := strings.ToLower(err.Error()) + return strings.Contains(msg, "found: 429") || + strings.Contains(msg, "too many requests") || + strings.Contains(msg, "connect: connection refused") || + strings.Contains(msg, "no route to host") || + strings.Contains(msg, "timeout") +} + +func makeFakeBridgeExit(opts *craftCertOptions, exitIndex int) *agglayertypes.BridgeExit { + addrBytes := crypto.Keccak256(append(append([]byte(nil), opts.nonce...), byte(exitIndex))) + return &agglayertypes.BridgeExit{ + LeafType: bridgetypes.LeafTypeAsset, + TokenInfo: &agglayertypes.TokenInfo{ + OriginNetwork: opts.originNetwork, + OriginTokenAddress: opts.originTokenAddr, + }, + DestinationNetwork: opts.destNetwork, + DestinationAddress: common.BytesToAddress(addrBytes), + Amount: new(big.Int).Set(opts.amount), + Metadata: nil, + } +} diff --git a/tools/backward_forward_let/craft_cert_test.go b/tools/backward_forward_let/craft_cert_test.go new file mode 100644 index 000000000..757f72248 --- /dev/null +++ b/tools/backward_forward_let/craft_cert_test.go @@ -0,0 +1,614 @@ +package backward_forward_let + +import ( + "context" + "crypto/ecdsa" + "encoding/json" + "errors" + "flag" + "math/big" + "testing" + "time" + + agglayertypes "github.com/agglayer/aggkit/agglayer/types" + aggsendertypes "github.com/agglayer/aggkit/aggsender/types" + bridgeservicetypes "github.com/agglayer/aggkit/bridgeservice/types" + bridgetypes "github.com/agglayer/aggkit/bridgesync/types" + "github.com/agglayer/go_signer/signer" + signertypes "github.com/agglayer/go_signer/signer/types" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" + "github.com/stretchr/testify/require" + "github.com/urfave/cli/v2" +) + +type stubAgglayerClient struct { + info agglayertypes.NetworkInfo + err error +} + +func (s *stubAgglayerClient) SendCertificate(context.Context, *agglayertypes.Certificate) (common.Hash, error) { + return common.Hash{}, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetCertificateHeader(context.Context, common.Hash) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetNetworkInfo(context.Context, uint32) (agglayertypes.NetworkInfo, error) { + return s.info, s.err +} + +func (s *stubAgglayerClient) GetEpochConfiguration(context.Context) (*agglayertypes.ClockConfiguration, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetLatestSettledCertificateHeader(context.Context, uint32) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetLatestPendingCertificateHeader(context.Context, uint32) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetCertificateHeaderByID(context.Context, common.Hash) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetCertificateHeaderByHash(context.Context, common.Hash) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetCertificateHeaderByCertificateID(context.Context, common.Hash) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetCertificateHeaderPerHeight(context.Context, uint32, uint64) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func (s *stubAgglayerClient) GetCertificateHeaderLegacy(context.Context, common.Hash) (*agglayertypes.CertificateHeader, error) { + return nil, errors.New("not implemented") +} + +func TestMakeFakeBridgeExit(t *testing.T) { + t.Parallel() + + opts := &craftCertOptions{ + nonce: []byte("nonce"), + originNetwork: 7, + originTokenAddr: common.HexToAddress("0x1111111111111111111111111111111111111111"), + destNetwork: 9, + amount: big.NewInt(123), + } + + exit0 := makeFakeBridgeExit(opts, 0) + exit1 := makeFakeBridgeExit(opts, 1) + + require.Equal(t, bridgetypes.LeafTypeAsset, exit0.LeafType) + require.Equal(t, uint32(7), exit0.TokenInfo.OriginNetwork) + require.Equal(t, common.HexToAddress("0x1111111111111111111111111111111111111111"), exit0.TokenInfo.OriginTokenAddress) + require.Equal(t, uint32(9), exit0.DestinationNetwork) + require.Equal(t, big.NewInt(123), exit0.Amount) + require.NotEqual(t, exit0.DestinationAddress, exit1.DestinationAddress) +} + +func TestCraftMaliciousCertificate_NoSettledCerts(t *testing.T) { + t.Parallel() + + signerKey, err := crypto.GenerateKey() + require.NoError(t, err) + + bridge := &stubL2Bridge{ + depositCount: big.NewInt(1), + root: [32]byte(common.HexToHash("0x1111111111111111111111111111111111111111111111111111111111111111")), + } + bridgeSvc := &stubBridgeService{ + bridges: map[uint32]*bridgeservicetypes.BridgeResponse{ + 0: { + LeafType: bridgetypes.LeafTypeAsset.Uint8(), + OriginNetwork: 0, + OriginAddress: bridgeservicetypes.Address("0x0000000000000000000000000000000000000000"), + DestinationNetwork: 1, + DestinationAddress: bridgeservicetypes.Address("0x2222222222222222222222222222222222222222"), + Amount: bridgeservicetypes.BigIntString("5"), + }, + }, + } + env := &Env{ + L2Bridge: bridge, + BridgeService: bridgeSvc, + AgglayerClient: &stubAgglayerClient{}, + L2NetworkID: 1, + } + + opts := &craftCertOptions{ + numFakeExits: 1, + startingExitIndex: 0, + nonce: []byte("run-a"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + } + + cert, err := craftMaliciousCertificate(context.Background(), env, nil, &stubHashSigner{key: signerKey}, opts) + require.NoError(t, err) + require.Equal(t, uint64(0), cert.Height) + require.Equal(t, common.Hash(bridge.root), cert.PrevLocalExitRoot) + require.Len(t, cert.BridgeExits, 1) + require.Equal(t, uint32(1), cert.L1InfoTreeLeafCount) + + expectedLER, err := ComputeLERForNewLeaves( + []common.Hash{BridgeResponseLeafHash(bridgeSvc.bridges[0])}, + []common.Hash{BridgeExitLeafHash(cert.BridgeExits[0])}, + ) + require.NoError(t, err) + require.Equal(t, expectedLER, cert.NewLocalExitRoot) + + multisig, ok := cert.AggchainData.(*agglayertypes.AggchainDataMultisig) + require.True(t, ok) + require.Len(t, multisig.Multisig.Signatures, 1) +} + +func TestCraftMaliciousCertificate_SettledCertsFromAggsenderRPC(t *testing.T) { + t.Parallel() + + signerKey, err := crypto.GenerateKey() + require.NoError(t, err) + + settledHeight := uint64(1) + settledLER := common.HexToHash("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + settledLeafCount := uint64(2) + info := agglayertypes.NetworkInfo{ + SettledHeight: &settledHeight, + SettledLER: &settledLER, + SettledLETLeafCount: &settledLeafCount, + } + + exit0 := makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("existing-0"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0) + exit1 := makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("existing-1"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0) + + rpc := &stubAggsenderRPC{ + exitsByHeight: map[uint64][]*agglayertypes.BridgeExit{ + 0: {exit0}, + 1: {exit1}, + }, + failHeights: map[uint64]bool{}, + } + rpcHeader := &aggsendertypes.Certificate{ + Header: &aggsendertypes.CertificateHeader{L1InfoTreeLeafCount: 7}, + } + rpcWithHeader := &stubCraftAggsenderRPC{stubAggsenderRPC: rpc, certByHeight: map[uint64]*aggsendertypes.Certificate{1: rpcHeader}} + + env := &Env{ + AgglayerClient: &stubAgglayerClient{info: info}, + AggsenderRPC: rpcWithHeader, + L2NetworkID: 1, + } + + opts := &craftCertOptions{ + numFakeExits: 1, + startingExitIndex: 5, + nonce: []byte("new"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + } + + cert, err := craftMaliciousCertificate(context.Background(), env, nil, &stubHashSigner{key: signerKey}, opts) + require.NoError(t, err) + require.Equal(t, uint64(2), cert.Height) + require.Equal(t, settledLER, cert.PrevLocalExitRoot) + require.Equal(t, uint32(7), cert.L1InfoTreeLeafCount) + + expectedLER, err := ComputeLERForNewLeaves( + []common.Hash{BridgeExitLeafHash(exit0), BridgeExitLeafHash(exit1)}, + []common.Hash{BridgeExitLeafHash(cert.BridgeExits[0])}, + ) + require.NoError(t, err) + require.Equal(t, expectedLER, cert.NewLocalExitRoot) +} + +func TestLoadExistingLeafHashes_ReconstructsMissingPrefixFromBridgeService(t *testing.T) { + t.Parallel() + + exit2 := makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("existing-2"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0) + exit3 := makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("existing-3"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0) + + bridge0 := &bridgeservicetypes.BridgeResponse{ + LeafType: bridgetypes.LeafTypeAsset.Uint8(), + OriginNetwork: 0, + OriginAddress: bridgeservicetypes.Address("0x0000000000000000000000000000000000000000"), + DestinationNetwork: 0, + DestinationAddress: bridgeservicetypes.Address("0x1111111111111111111111111111111111111111"), + Amount: bridgeservicetypes.BigIntString("1"), + } + bridge1 := &bridgeservicetypes.BridgeResponse{ + LeafType: bridgetypes.LeafTypeAsset.Uint8(), + OriginNetwork: 0, + OriginAddress: bridgeservicetypes.Address("0x0000000000000000000000000000000000000000"), + DestinationNetwork: 0, + DestinationAddress: bridgeservicetypes.Address("0x2222222222222222222222222222222222222222"), + Amount: bridgeservicetypes.BigIntString("2"), + } + + env := &Env{ + L2NetworkID: 1, + BridgeService: &stubBridgeService{ + bridges: map[uint32]*bridgeservicetypes.BridgeResponse{ + 0: bridge0, + 1: bridge1, + }, + }, + AggsenderRPC: &stubAggsenderRPC{ + exitsByHeight: map[uint64][]*agglayertypes.BridgeExit{ + 2: {exit2}, + 3: {exit3}, + }, + failHeights: map[uint64]bool{ + 0: true, + 1: true, + }, + }, + } + + hashes, err := loadExistingLeafHashes(context.Background(), env, nil, 4, common.Hash{}, 4) + require.NoError(t, err) + require.Equal(t, []common.Hash{ + BridgeResponseLeafHash(bridge0), + BridgeResponseLeafHash(bridge1), + BridgeExitLeafHash(exit2), + BridgeExitLeafHash(exit3), + }, hashes) +} + +func TestLoadExistingLeafHashes_AllHistoricalHeightsMissingFallsBackToBridgeService(t *testing.T) { + t.Parallel() + + bridge0 := &bridgeservicetypes.BridgeResponse{ + LeafType: bridgetypes.LeafTypeAsset.Uint8(), + OriginNetwork: 0, + OriginAddress: bridgeservicetypes.Address("0x0000000000000000000000000000000000000000"), + DestinationNetwork: 0, + DestinationAddress: bridgeservicetypes.Address("0x3333333333333333333333333333333333333333"), + Amount: bridgeservicetypes.BigIntString("3"), + } + bridge1 := &bridgeservicetypes.BridgeResponse{ + LeafType: bridgetypes.LeafTypeAsset.Uint8(), + OriginNetwork: 0, + OriginAddress: bridgeservicetypes.Address("0x0000000000000000000000000000000000000000"), + DestinationNetwork: 0, + DestinationAddress: bridgeservicetypes.Address("0x4444444444444444444444444444444444444444"), + Amount: bridgeservicetypes.BigIntString("4"), + } + + env := &Env{ + L2NetworkID: 1, + BridgeService: &stubBridgeService{ + bridges: map[uint32]*bridgeservicetypes.BridgeResponse{ + 0: bridge0, + 1: bridge1, + }, + }, + AggsenderRPC: &stubAggsenderRPC{ + failHeights: map[uint64]bool{ + 0: true, + 1: true, + }, + }, + } + + hashes, err := loadExistingLeafHashes(context.Background(), env, nil, 2, common.Hash{}, 2) + require.NoError(t, err) + require.Equal(t, []common.Hash{ + BridgeResponseLeafHash(bridge0), + BridgeResponseLeafHash(bridge1), + }, hashes) +} + +func TestLoadExistingLeafHashes_UsesBridgeServiceWhenCurrentBridgeMatchesSettled(t *testing.T) { + t.Parallel() + + bridge0 := &bridgeservicetypes.BridgeResponse{ + LeafType: bridgetypes.LeafTypeAsset.Uint8(), + OriginNetwork: 0, + OriginAddress: bridgeservicetypes.Address("0x0000000000000000000000000000000000000000"), + DestinationNetwork: 0, + DestinationAddress: bridgeservicetypes.Address("0x5555555555555555555555555555555555555555"), + Amount: bridgeservicetypes.BigIntString("5"), + } + bridge1 := &bridgeservicetypes.BridgeResponse{ + LeafType: bridgetypes.LeafTypeAsset.Uint8(), + OriginNetwork: 0, + OriginAddress: bridgeservicetypes.Address("0x0000000000000000000000000000000000000000"), + DestinationNetwork: 0, + DestinationAddress: bridgeservicetypes.Address("0x6666666666666666666666666666666666666666"), + Amount: bridgeservicetypes.BigIntString("6"), + } + + settledLER, err := ComputeLERForNewLeaves( + []common.Hash{BridgeResponseLeafHash(bridge0)}, + []common.Hash{BridgeResponseLeafHash(bridge1)}, + ) + require.NoError(t, err) + + env := &Env{ + L2NetworkID: 1, + L2Bridge: &stubL2Bridge{ + depositCount: big.NewInt(2), + root: [32]byte(settledLER), + }, + BridgeService: &stubBridgeService{ + bridges: map[uint32]*bridgeservicetypes.BridgeResponse{ + 0: bridge0, + 1: bridge1, + }, + }, + AggsenderRPC: &stubAggsenderRPC{ + failHeights: map[uint64]bool{ + 0: true, + 1: true, + }, + }, + } + + hashes, err := loadExistingLeafHashes(context.Background(), env, nil, 2, settledLER, 2) + require.NoError(t, err) + require.Equal(t, []common.Hash{ + BridgeResponseLeafHash(bridge0), + BridgeResponseLeafHash(bridge1), + }, hashes) +} + +func TestGetStoredBridgeExitsForHeight_FromDB(t *testing.T) { + t.Parallel() + + payload := &agglayertypes.Certificate{ + BridgeExits: []*agglayertypes.BridgeExit{ + makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("db"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0), + }, + } + raw, err := json.Marshal(payload) + require.NoError(t, err) + + store := &stubCraftCertStore{ + certs: map[uint64]*aggsendertypes.Certificate{ + 0: {SignedCertificate: ptrString(string(raw)), Header: &aggsendertypes.CertificateHeader{}}, + }, + } + + exits, err := getStoredBridgeExitsForHeight(&Env{}, store, 0) + require.NoError(t, err) + require.Len(t, exits, 1) +} + +func TestGetStoredBridgeExitsForHeight_FromOverride(t *testing.T) { + t.Parallel() + + overrideExit := makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("override"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0) + + exits, err := getStoredBridgeExitsForHeight(&Env{ + BridgeExitsOverride: &BridgeExitsOverride{ + parsed: map[uint64][]*agglayertypes.BridgeExit{ + 7: {overrideExit}, + }, + }, + }, nil, 7) + require.NoError(t, err) + require.Equal(t, []*agglayertypes.BridgeExit{overrideExit}, exits) +} + +func TestGetStoredBridgeExitsForHeight_FromAggsenderHeaderFallback(t *testing.T) { + t.Parallel() + + payload := &agglayertypes.Certificate{ + BridgeExits: []*agglayertypes.BridgeExit{ + makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("rpc-header"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0), + }, + } + raw, err := json.Marshal(payload) + require.NoError(t, err) + + rpc := &stubCraftAggsenderRPC{ + stubAggsenderRPC: &stubAggsenderRPC{ + failHeights: map[uint64]bool{0: true}, + }, + certByHeight: map[uint64]*aggsendertypes.Certificate{ + 0: {SignedCertificate: ptrString(string(raw))}, + }, + } + + exits, err := getStoredBridgeExitsForHeight(&Env{AggsenderRPC: rpc}, nil, 0) + require.NoError(t, err) + require.Len(t, exits, 1) +} + +func TestGetStoredBridgeExitsForHeight_Retries429OnHeaderPath(t *testing.T) { + t.Parallel() + + payload := &agglayertypes.Certificate{ + BridgeExits: []*agglayertypes.BridgeExit{ + makeFakeBridgeExit(&craftCertOptions{ + nonce: []byte("rpc-retry"), + originNetwork: 0, + originTokenAddr: common.Address{}, + destNetwork: 0, + amount: big.NewInt(0), + }, 0), + }, + } + raw, err := json.Marshal(payload) + require.NoError(t, err) + + rpc := &stubCraftAggsenderRPC{ + stubAggsenderRPC: &stubAggsenderRPC{ + failHeights: map[uint64]bool{0: true}, + }, + certByHeight: map[uint64]*aggsendertypes.Certificate{ + 0: {SignedCertificate: ptrString(string(raw))}, + }, + headerErrsRemaining: map[uint64]int{0: 2}, + } + + exits, err := getStoredBridgeExitsForHeight(&Env{AggsenderRPC: rpc}, nil, 0) + require.NoError(t, err) + require.Len(t, exits, 1) +} + +func TestCallCraftCertRPCWithTimeout_ReturnsResult(t *testing.T) { + t.Parallel() + + value, err := callCraftCertRPCWithTimeout(func() (int, error) { + return 7, nil + }) + require.NoError(t, err) + require.Equal(t, 7, value) +} + +func TestCallCraftCertRPCWithTimeout_TimesOut(t *testing.T) { + t.Parallel() + + start := time.Now() + _, err := callCraftCertRPCWithTimeout(func() (int, error) { + time.Sleep(craftCertRPCRequestTimeout + 200*time.Millisecond) + return 0, nil + }) + require.ErrorContains(t, err, "aggsender RPC request timed out") + require.Less(t, time.Since(start), craftCertRPCRequestTimeout+time.Second) +} + +type stubCraftAggsenderRPC struct { + *stubAggsenderRPC + certByHeight map[uint64]*aggsendertypes.Certificate + headerErrsRemaining map[uint64]int +} + +func (s *stubCraftAggsenderRPC) GetCertificateHeaderPerHeight(height *uint64) (*aggsendertypes.Certificate, error) { + if s.headerErrsRemaining != nil && s.headerErrsRemaining[*height] > 0 { + s.headerErrsRemaining[*height]-- + return nil, errors.New("invalid status code, expected: 200, found: 429") + } + return s.certByHeight[*height], nil +} + +type stubCraftCertStore struct { + certs map[uint64]*aggsendertypes.Certificate + headers map[uint64]*aggsendertypes.CertificateHeader +} + +func (s *stubCraftCertStore) GetCertificateByHeight(height uint64) (*aggsendertypes.Certificate, error) { + return s.certs[height], nil +} + +func (s *stubCraftCertStore) GetCertificateHeaderByHeight(height uint64) (*aggsendertypes.CertificateHeader, error) { + return s.headers[height], nil +} + +func ptrString(v string) *string { return &v } + +type stubHashSigner struct { + key *ecdsa.PrivateKey +} + +func (s *stubHashSigner) SignHash(_ context.Context, hash common.Hash) ([]byte, error) { + return crypto.Sign(hash.Bytes(), s.key) +} + +func TestResolveCraftCertSignerConfig_FromCLI(t *testing.T) { + t.Parallel() + + app := cli.NewApp() + set := flagSetForCraftCert(t, + "--signer-key-path", "/tmp/sequencer.keystore", + "--signer-key-password", "secret", + ) + ctx := cli.NewContext(app, set, nil) + + cfg, err := resolveCraftCertSignerConfig(&Config{}, ctx) + require.NoError(t, err) + require.Equal(t, signer.NewLocalSignerConfig("/tmp/sequencer.keystore", "secret"), cfg) +} + +func TestResolveCraftCertSignerConfig_FromAggsenderConfig(t *testing.T) { + t.Parallel() + + app := cli.NewApp() + ctx := cli.NewContext(app, flagSetForCraftCert(t), nil) + expected := signertypes.SignerConfig{ + Method: signertypes.MethodGCPKMS, + Config: map[string]any{"KeyName": "projects/p/locations/l/keyRings/r/cryptoKeys/k/cryptoKeyVersions/1"}, + } + + cfg, err := resolveCraftCertSignerConfig(&Config{ + AggSender: CraftCertAggsenderConfig{ + AggsenderPrivateKey: expected, + }, + }, ctx) + require.NoError(t, err) + require.Equal(t, expected, cfg) +} + +func TestResolveCraftCertSignerConfig_Missing(t *testing.T) { + t.Parallel() + + app := cli.NewApp() + ctx := cli.NewContext(app, flagSetForCraftCert(t), nil) + + _, err := resolveCraftCertSignerConfig(&Config{}, ctx) + require.Error(t, err) + require.Contains(t, err.Error(), "AggSender.AggsenderPrivateKey") +} + +func flagSetForCraftCert(t *testing.T, args ...string) *flag.FlagSet { + t.Helper() + + set := flag.NewFlagSet("craft-cert", flag.ContinueOnError) + set.String("signer-key-path", "", "") + set.String("signer-key-password", "", "") + require.NoError(t, set.Parse(args)) + return set +} diff --git a/tools/backward_forward_let/diagnosis.go b/tools/backward_forward_let/diagnosis.go index 60e28318d..d1cc8f105 100644 --- a/tools/backward_forward_let/diagnosis.go +++ b/tools/backward_forward_let/diagnosis.go @@ -285,7 +285,7 @@ func collectExtraL2Bridges( br, err := env.BridgeService.GetBridgeByDepositCount(ctx, env.L2NetworkID, dc) if err != nil { if isNotFound(err) { - continue + return nil, fmt.Errorf("get L2 bridge at DC=%d: not indexed yet", dc) } return nil, fmt.Errorf("get L2 bridge at DC=%d: %w", dc, err) } @@ -358,13 +358,13 @@ func PrintDiagnosis(w io.Writer, result *DiagnosisResult) { fmt.Fprintln(w) } - if result.Case == NoDivergence { - fmt.Fprintln(w, "Case: NoDivergence — L1 settled state and L2 on-chain state are in sync.") + if result.AggsenderAPIFailed { + printMissingCertReport(w, result) return } - if result.AggsenderAPIFailed { - printMissingCertReport(w, result) + if result.IsCompleteNoDivergence() { + fmt.Fprintln(w, "Case: NoDivergence — L1 settled state and L2 on-chain state are in sync.") return } diff --git a/tools/backward_forward_let/diagnosis_test.go b/tools/backward_forward_let/diagnosis_test.go index ac9ae912c..2b02307f8 100644 --- a/tools/backward_forward_let/diagnosis_test.go +++ b/tools/backward_forward_let/diagnosis_test.go @@ -241,6 +241,15 @@ func TestPrintDiagnosis_NoDivergence(t *testing.T) { require.Contains(t, buf.String(), "NoDivergence") } +func TestDiagnosisResult_IsCompleteNoDivergence(t *testing.T) { + t.Parallel() + + require.True(t, (&DiagnosisResult{Case: NoDivergence}).IsCompleteNoDivergence()) + require.False(t, (&DiagnosisResult{Case: NoDivergence, AggsenderAPIFailed: true}).IsCompleteNoDivergence()) + require.False(t, (&DiagnosisResult{Case: Case1}).IsCompleteNoDivergence()) + require.False(t, (*DiagnosisResult)(nil).IsCompleteNoDivergence()) +} + // TestPrintDiagnosis_AggsenderAPIFailed verifies the actionable missing-cert output // when all cert IDs are resolved (no UNKNOWN entries). func TestPrintDiagnosis_AggsenderAPIFailed(t *testing.T) { @@ -273,6 +282,30 @@ func TestPrintDiagnosis_AggsenderAPIFailed(t *testing.T) { require.NotContains(t, output, "certificate_per_network_cf") } +func TestPrintDiagnosis_AggsenderAPIFailed_PartialResultDoesNotPrintNoDivergence(t *testing.T) { + t.Parallel() + + result := &DiagnosisResult{ + Case: NoDivergence, + AggsenderAPIFailed: true, + L1SettledLER: common.HexToHash("0x1111"), + L1SettledDepositCount: 4, + L2CurrentLER: common.HexToHash("0x2222"), + L2CurrentDepositCount: 3, + MissingCerts: []MissingCertInfo{ + {Height: 1150, CertID: common.HexToHash("0x3333"), CertIDResolved: true}, + }, + } + + var buf bytes.Buffer + PrintDiagnosis(&buf, result) + output := buf.String() + + require.Contains(t, output, "Aggsender RPC returned no bridge exit data") + require.NotContains(t, output, "Case: NoDivergence") + require.NotContains(t, output, "Nothing to do") +} + // TestPrintDiagnosis_AggsenderAPIFailed_WithUnknownCertID verifies that the extra // UNKNOWN note is printed when one or more cert IDs could not be resolved. func TestPrintDiagnosis_AggsenderAPIFailed_WithUnknownCertID(t *testing.T) { @@ -755,7 +788,7 @@ func TestCollectExtraL2Bridges_HappyPath(t *testing.T) { require.Len(t, extra, 2) } -// TestCollectExtraL2Bridges_NotFound verifies that NotFound entries are skipped. +// TestCollectExtraL2Bridges_NotFound verifies that missing bridge-service entries fail fast. func TestCollectExtraL2Bridges_NotFound(t *testing.T) { t.Parallel() @@ -766,15 +799,16 @@ func TestCollectExtraL2Bridges_NotFound(t *testing.T) { BridgeService: &stubBridgeService{ bridges: map[uint32]*bridgeservicetypes.BridgeResponse{ 3: br3, - // DC 4 is absent → returns ErrNotFound → skipped + // DC 4 is absent → returns ErrNotFound → fail fast }, }, L2NetworkID: 1, } - extra, err := collectExtraL2Bridges(context.Background(), env, 3, 5) - require.NoError(t, err) - require.Len(t, extra, 1) + _, err := collectExtraL2Bridges(context.Background(), env, 3, 5) + require.Error(t, err) + require.Contains(t, err.Error(), "DC=4") + require.Contains(t, err.Error(), "not indexed yet") } // TestCollectExtraL2Bridges_ServiceError verifies a non-NotFound error is propagated. diff --git a/tools/backward_forward_let/run.go b/tools/backward_forward_let/run.go index e14e57b5d..c4f0ebcb9 100644 --- a/tools/backward_forward_let/run.go +++ b/tools/backward_forward_let/run.go @@ -174,7 +174,7 @@ func Run(c *cli.Context) error { PrintDiagnosis(os.Stdout, diagnosis) - if diagnosis.Case == NoDivergence { + if diagnosis.IsCompleteNoDivergence() { fmt.Println("Nothing to do: L1 settled state and L2 on-chain state are in sync.") return nil } diff --git a/tools/backward_forward_let/send_cert.go b/tools/backward_forward_let/send_cert.go index b29d9ce0a..dc6a22f87 100644 --- a/tools/backward_forward_let/send_cert.go +++ b/tools/backward_forward_let/send_cert.go @@ -34,7 +34,7 @@ type certStorager interface { // RunSendCert is the CLI action for the send-cert subcommand. // It reads a certificate from JSON (--cert-json or --cert-file), sends it to the agglayer, -// and stores it in the aggsender SQLite DB. +// and optionally stores it in the aggsender SQLite DB. func RunSendCert(c *cli.Context) error { // Load config. cfg, err := LoadConfig(c) @@ -59,11 +59,13 @@ func RunSendCert(c *cli.Context) error { return fmt.Errorf("create agglayer client: %w", err) } - // Open aggsender DB. - dbPath := c.String("db-path") - storage, err := openAggsenderStorage(logger, dbPath) - if err != nil { - return err + var storage certStorager + if !c.Bool("no-db") { + dbPath := c.String("db-path") + storage, err = openAggsenderStorage(logger, dbPath) + if err != nil { + return err + } } return sendCertificate(c.Context, cert, certJSON, agglayerClient, storage) @@ -85,6 +87,11 @@ func sendCertificate( } fmt.Printf("Certificate sent. Hash: %s\n", certHash.Hex()) + if storage == nil { + fmt.Println("Skipping aggsender DB storage (--no-db).") + return nil + } + // Derive FromBlock from the previous certificate so that aggsender's retry // verification (verifyRetryCertStartingBlock) passes when this cert goes InError. // getLastSentBlockAndRetryCount computes: lastSentBlock = cert.FromBlock - 1 (if > 0), diff --git a/tools/backward_forward_let/send_cert_test.go b/tools/backward_forward_let/send_cert_test.go index 1f20d503f..3a08c645b 100644 --- a/tools/backward_forward_let/send_cert_test.go +++ b/tools/backward_forward_let/send_cert_test.go @@ -162,6 +162,20 @@ func TestSendCertificate_AgglayerError(t *testing.T) { require.Nil(t, storage.saved) } +func TestSendCertificate_NoDB(t *testing.T) { + t.Parallel() + + expectedHash := common.HexToHash("0xbeef") + sender := &stubAgglayerSender{hash: expectedHash} + + certJSON := minimalCertJSON(4) + var cert agglayertypes.Certificate + require.NoError(t, cert.UnmarshalJSON([]byte(certJSON))) + + err := sendCertificate(context.Background(), cert, certJSON, sender, nil) + require.NoError(t, err) +} + func TestSendCertificate_DBError(t *testing.T) { t.Parallel() @@ -287,6 +301,7 @@ func newSendCertCLIContext(flags map[string]string) *cli.Context { fs.String("cert-json", "", "") fs.String("cert-file", "", "") fs.String("db-path", "", "") + fs.Bool("no-db", false, "") for name, val := range flags { _ = fs.Set(name, val) } @@ -313,6 +328,7 @@ func TestRunSendCert_LoadConfigError(t *testing.T) { &cli.StringFlag{Name: "cert-json"}, &cli.StringFlag{Name: "cert-file"}, &cli.StringFlag{Name: "db-path"}, + &cli.BoolFlag{Name: "no-db"}, }, }, } diff --git a/tools/backward_forward_let/types.go b/tools/backward_forward_let/types.go index 4266416aa..237c9e5e1 100644 --- a/tools/backward_forward_let/types.go +++ b/tools/backward_forward_let/types.go @@ -80,6 +80,12 @@ type DiagnosisResult struct { FailedCertID common.Hash } +// IsCompleteNoDivergence reports whether diagnosis completed successfully and +// confirmed there is no divergence between settled L1 state and the L2 bridge. +func (d *DiagnosisResult) IsCompleteNoDivergence() bool { + return d != nil && !d.AggsenderAPIFailed && d.Case == NoDivergence +} + // MissingCertInfo describes a certificate height for which bridge exits // could not be obtained from any available source. type MissingCertInfo struct {