Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions api/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
#
# Copyright (C) 2021-2023 Collabora Limited
# Copyright (C) 2021-2025 Collabora Limited
# Author: Guillaume Tucker <[email protected]>
# Author: Jeny Sadadia <[email protected]>
# Author: Denys Fedoryshchenko <[email protected]>

# pylint: disable=unused-argument,global-statement,too-many-lines

Expand All @@ -29,14 +30,13 @@
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse, PlainTextResponse, FileResponse
from fastapi.security import OAuth2PasswordRequestForm
from fastapi_pagination import add_pagination, pagination_ctx
from fastapi_pagination import add_pagination
from fastapi_versioning import VersionedFastAPI
from bson import ObjectId, errors
from pymongo.errors import DuplicateKeyError
from fastapi_users import FastAPIUsers
from beanie import PydanticObjectId
from pydantic import BaseModel
from kernelci.api.models import (

Check failure on line 39 in api/main.py

View workflow job for this annotation

GitHub Actions / Lint

Unable to import 'kernelci.api.models'
Node,
Hierarchy,
PublishEvent,
Expand Down Expand Up @@ -429,6 +429,13 @@
query_params['data.result'] = result
if limit:
query_params['limit'] = int(limit)
# limit recursive to 1000
if recursive and (not limit or int(limit) > 1000):
# generate error
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Recursive limit is too large, max is 1000"
)
resp = await db.find_by_attributes_nonpaginated(EventHistory, query_params)
resp_list = []
for item in resp:
Expand Down Expand Up @@ -556,6 +563,11 @@
timeout=15
)
return resp
except asyncio.TimeoutError as error:
raise HTTPException(
status_code=status.HTTP_504_GATEWAY_TIMEOUT,
detail=f"Timeout while fetching nodes: {str(error)}"
) from error
except KeyError as error:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
Expand Down
48 changes: 45 additions & 3 deletions api/maintenance.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,45 @@
from pymongo import MongoClient
# SPDX-License-Identifier: LGPL-2.1-or-later
#
# Copyright (C) 2021-2025 Collabora Limited
# Author: Guillaume Tucker <[email protected]>
# Author: Jeny Sadadia <[email protected]>
# Author: Denys Fedoryshchenko <[email protected]>

"""
This module provides maintenance utilities for the KernelCI API, including
functions to purge old nodes from the database and manage MongoDB connections.
"""
import datetime
import os
from pymongo import MongoClient


def purge_ids(db, collection, ids):
"""
Delete documents from the specified collection in the database
by their IDs.

Args:
db: The MongoDB database instance.
collection (str): The name of the collection to purge from.
ids (list): List of document IDs to delete.
"""
print("Purging", len(ids), "from", collection)
db[collection].delete_many({"_id": {"$in": ids}})
db[collection].delete_many({
"_id": {"$in": ids}
})


def connect_to_db():
"""
Connect to the MongoDB database using the MONGO_SERVICE environment
variable.

Returns:
db: The 'kernelci' MongoDB database instance.
Raises:
ValueError: If the MONGO_SERVICE environment variable is not set.
"""
mongo_service = os.environ["MONGO_SERVICE"]
if not mongo_service:
raise ValueError("MONGO_SERVICE environment variable is not set")
Expand All @@ -18,9 +49,20 @@ def connect_to_db():


async def purge_old_nodes(age_days=180):
"""
Purge nodes from the 'nodes' collection that are older than the
specified number of days.

Args:
age_days (int, optional): The age in days to use as the
threshold for deletion.
Defaults to 180.
"""
date_end = datetime.datetime.today() - datetime.timedelta(days=age_days)
db = connect_to_db()
nodes = db["nodes"].find({"created": {"$lt": date_end}})
nodes = db["nodes"].find({
"created": {"$lt": date_end}
})
# We need to delete node in chunks of 1000,
# to not block the main thread for too long
del_batch = []
Expand Down