Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 41 additions & 3 deletions premerge/advisor/advisor_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class TestExplanationRequest(TypedDict):
}

EXPLAINED_HEAD_MAX_COMMIT_INDEX_DIFFERENCE = 5
EXPLAINED_FLAKY_MIN_COMMIT_RANGE = 200


def _create_table(table_name: str, connection: sqlite3.Connection):
Expand Down Expand Up @@ -131,20 +132,57 @@ def _try_explain_failing_at_head(
return None


def _try_explain_flaky_failure(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a comment describing the particular definition of "flaky" this function is using would be handy?

It looks to me like it's testing if this failure has been seen before over at least a 200 commit range between oldest/newest sighting? What's to stop that being "it's been failing at HEAD for at least 200 revisions"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a function level docstring.

I've never seen a non-flaky test failure that has been failing for more than ~20 commits. I think a OOM more in range should be safe. I thought about also adding a heuristic around the longest running subsequence to catch cases like this, but I think it adds complexity for little benefit:

  1. As mentioned, 200 is about an OOM bigger than the ranges we've seen in practice.
  2. We're exactly matching the failure message, so the test would need to fail in the exact same way to trigger this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably still go for a test that verifies the failures are discontiguous from ToT rather than a heuristic that guesses that something seems unlikely to be a long-standing failure at HEAD - seems cheap/simple enough to do. Because bad advice can be pretty confusing.

But I guess it's a start.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I'll add in that functionality soon. It would be good to have it for the failing at head case too.

db_connection: sqlite3.Connection,
test_failure: TestFailure,
platform: str,
) -> FailureExplanation | None:
test_name_matches = db_connection.execute(
"SELECT failure_message, commit_index FROM failures WHERE source_type='postcommit' AND platform=?",
(platform,),
).fetchall()
commit_indices = []
for failure_message, commit_index in test_name_matches:
if failure_message == test_failure["message"]:
commit_indices.append(commit_index)
if len(commit_indices) == 0:
return None
commit_range = max(commit_indices) - min(commit_indices)
if commit_range > EXPLAINED_FLAKY_MIN_COMMIT_RANGE:
return {
"name": test_failure["name"],
"explained": True,
"reason": "This test is flaky in main.",
}
return None


def explain_failures(
explanation_request: TestExplanationRequest,
repository_path: str,
db_connection: sqlite3.Connection,
) -> list[FailureExplanation]:
explanations = []
for test_failure in explanation_request["failures"]:
commit_index = git_utils.get_commit_index(
explanation_request["base_commit_sha"], repository_path, db_connection
)
# We want to try and explain flaky failures first. Otherwise we might
# explain a flaky failure as a failure at head if there is a recent
# failure in the last couple of commits.
explained_as_flaky = _try_explain_flaky_failure(
db_connection,
test_failure,
explanation_request["platform"],
)
if explained_as_flaky:
explanations.append(explained_as_flaky)
continue
explained_at_head = _try_explain_failing_at_head(
db_connection,
test_failure,
explanation_request["base_commit_sha"],
git_utils.get_commit_index(
explanation_request["base_commit_sha"], repository_path, db_connection
),
commit_index,
explanation_request["platform"],
)
if explained_at_head:
Expand Down
103 changes: 103 additions & 0 deletions premerge/advisor/advisor_lib_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ def setUp(self):
[
("8d29a3bb6f3d92d65bf5811b53bf42bf63685359", 1),
("6b7064686b706f7064656d6f6e68756e74657273", 2),
("6a6f73687561747265656a6f7368756174726565", 201),
("6269677375726269677375726269677375726269", 202),
("6d746c616e676c65796d746c616e676c65796d74", 203),
],
)
self.repository_path_dir = tempfile.TemporaryDirectory()
Expand Down Expand Up @@ -280,3 +283,103 @@ def test_no_explain_different_message(self):
}
],
)

def _setup_flaky_test_info(
self,
source_type="postcommit",
message="failed in way 1",
second_failure_sha="6269677375726269677375726269677375726269",
):
failures_info = [
{
"source_type": source_type,
"base_commit_sha": "8d29a3bb6f3d92d65bf5811b53bf42bf63685359",
"source_id": "10000",
"failures": [
{"name": "a.ll", "message": message},
],
"platform": "linux-x86_64",
},
{
"source_type": source_type,
"base_commit_sha": second_failure_sha,
"source_id": "100001",
"failures": [
{"name": "a.ll", "message": message},
],
"platform": "linux-x86_64",
},
]
for failure_info in failures_info:
advisor_lib.upload_failures(
failure_info, self.db_connection, self.repository_path
)

def _get_flaky_test_explanations(self):
explanation_request = {
"failures": [{"name": "a.ll", "message": "failed in way 1"}],
"base_commit_sha": "6d746c616e676c65796d746c616e676c65796d74",
"platform": "linux-x86_64",
}
return advisor_lib.explain_failures(
explanation_request, self.repository_path, self.db_connection
)

def test_explain_flaky(self):
self._setup_flaky_test_info()
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": True,
"reason": "This test is flaky in main.",
}
],
)

def test_no_explain_flaky_different_message(self):
self._setup_flaky_test_info(message="failed in way 2")
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": False,
"reason": None,
}
],
)

# Test that we do not explain away flaky failures from pull request data.
# PRs might have the same failures multiple times across a large span of
# base commits, which might accidentally trigger the heuristic.
def test_no_explain_flaky_pullrequest_data(self):
self._setup_flaky_test_info(source_type="pull_request")
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": False,
"reason": None,
}
],
)

# Test that if all of the flaky failures are within a small range, we do
# not report this as a flaky failure.
def test_no_explain_flaky_small_range(self):
self._setup_flaky_test_info(
second_failure_sha="6b7064686b706f7064656d6f6e68756e74657273"
)
self.assertListEqual(
self._get_flaky_test_explanations(),
[
{
"name": "a.ll",
"explained": False,
"reason": None,
}
],
)