Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions internal/cbm/service_patterns.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,191 @@ static const lib_pattern_t *match_qn(const char *qn, const lib_pattern_t *patter
return NULL;
}

static bool starts_with_segment(const char *path, const char *segment) {
if (!path || path[0] != '/' || !segment) {
return false;
}
size_t seg_len = strlen(segment);
const char *p = path + 1;
return strncmp(p, segment, seg_len) == 0 && (p[seg_len] == '\0' || p[seg_len] == '/');
}

static bool contains_segment(const char *path, const char *segment) {
if (!path || !segment) {
return false;
}
size_t seg_len = strlen(segment);
const char *p = path;
while ((p = strchr(p, '/')) != NULL) {
p++;
if (strncmp(p, segment, seg_len) == 0 && (p[seg_len] == '\0' || p[seg_len] == '/')) {
return true;
}
}
return false;
}

static bool is_digit_char(char ch) {
return ch >= '0' && ch <= '9';
}

static bool has_http_route_marker(const char *path) {
if (starts_with_segment(path, "api") || starts_with_segment(path, "apis") ||
starts_with_segment(path, "graphql") || starts_with_segment(path, "health") ||
starts_with_segment(path, "metrics")) {
return true;
}
return path && path[0] == '/' && path[1] == 'v' && is_digit_char(path[2]) &&
(path[3] == '\0' || path[3] == '/');
}

static bool has_filesystem_root(const char *path) {
static const char *const roots[] = {"etc", "root", "var", "usr", "home", "tmp",
"private", "opt", "bin", "sbin", "dev", "proc",
"sys", "run", "lib", "lib64", "mnt", "media",
"boot", "srv", "Users", "Volumes", NULL};
for (int i = 0; roots[i]; i++) {
if (starts_with_segment(path, roots[i])) {
return true;
}
}
return false;
}

static bool has_hidden_config_segment(const char *path) {
static const char *const segments[] = {".aws", ".azure", ".config", ".docker", ".env",
".git", ".gnupg", ".kube", ".ssh", NULL};
for (int i = 0; segments[i]; i++) {
if (contains_segment(path, segments[i])) {
return true;
}
}
return false;
}

static bool path_ext_matches(const char *ext, const char *wanted) {
return ext && wanted && strcmp(ext, wanted) == 0;
}

static bool has_filesystem_extension(const char *path) {
if (!path) {
return false;
}
const char *end = strpbrk(path, "?#");
if (!end) {
end = path + strlen(path);
}
const char *last_slash = path;
for (const char *p = path; p < end; p++) {
if (*p == '/') {
last_slash = p;
}
}
const char *dot = NULL;
for (const char *p = last_slash + 1; p < end; p++) {
if (*p == '.') {
dot = p;
}
}
if (!dot || dot == end - 1) {
return false;
}
char ext[32];
size_t ext_len = (size_t)(end - dot);
if (ext_len >= sizeof(ext)) {
return false;
}
memcpy(ext, dot, ext_len);
ext[ext_len] = '\0';

static const char *const hard_file_exts[] = {
".cfg", ".conf", ".credentials", ".crt", ".db", ".env",
".ini", ".key", ".pem", ".pid", ".properties", ".service",
".sock", ".socket", ".sqlite", ".toml", NULL};
for (int i = 0; hard_file_exts[i]; i++) {
if (path_ext_matches(ext, hard_file_exts[i])) {
return true;
}
}
if ((path_ext_matches(ext, ".json") || path_ext_matches(ext, ".yaml") ||
path_ext_matches(ext, ".yml") || path_ext_matches(ext, ".xml")) &&
!has_http_route_marker(path)) {
return true;
}
return false;
}

static bool callee_is_delimiter_or_filesystem_builder(const char *callee_name) {
if (!callee_name) {
return false;
}
const char *last_dot = strrchr(callee_name, '.');
const char *last_colon = strstr(callee_name, "::");
const char *method = callee_name;
if (last_dot && last_dot[1]) {
method = last_dot + 1;
}
if (last_colon && last_colon[2]) {
method = last_colon + 2;
}
if (strcmp(method, "split") == 0 || strcmp(method, "rsplit") == 0 ||
strcmp(method, "partition") == 0 || strcmp(method, "join") == 0) {
return true;
}
return strstr(callee_name, "os.path.join") != NULL || strstr(callee_name, "path.join") != NULL;
}

static const char *strip_string_delimiters(const char *literal, char *buf, size_t buf_sz) {
if (!literal || !literal[0]) {
return NULL;
}
const char *start = literal;
while (*start == ' ' || *start == '\t' || *start == '\n' || *start == '\r') {
start++;
}
size_t len = strlen(start);
while (len > 0 && (start[len - 1] == ' ' || start[len - 1] == '\t' || start[len - 1] == '\n' ||
start[len - 1] == '\r')) {
len--;
}
if (len >= 2 && (start[0] == '"' || start[0] == '\'' || start[0] == '`') &&
start[len - 1] == start[0]) {
start++;
len -= 2;
}
if (len == 0 || len >= buf_sz) {
return NULL;
}
memcpy(buf, start, len);
buf[len] = '\0';
return buf;
}

bool cbm_service_pattern_is_http_route_literal(const char *literal, const char *callee_name) {
char path_buf[1024];
const char *path = strip_string_delimiters(literal, path_buf, sizeof(path_buf));
if (!path || !path[0]) {
return false;
}
if (strncmp(path, "http://", 7) == 0 || strncmp(path, "https://", 8) == 0) {
return true;
}
if (strstr(path, "://") != NULL) {
return false;
}
if (path[0] != '/') {
return false;
}
if (callee_is_delimiter_or_filesystem_builder(callee_name)) {
return false;
}
if (has_filesystem_root(path) || has_hidden_config_segment(path) ||
has_filesystem_extension(path)) {
return false;
}
return true;
}

/* ── Public API ────────────────────────────────────────────────── */

/* Per-worker TLS cache of cbm_service_pattern_match results.
Expand Down
21 changes: 21 additions & 0 deletions src/pipeline/pass_route_nodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ enum {
#include <stdio.h>
#include <string.h>

bool cbm_service_pattern_is_http_route_literal(const char *literal, const char *callee_name);

/* Extract a JSON string value by key from properties.
* Returns pointer into buf (caller provides buffer). NULL if not found. */
static const char *json_extract(const char *json, const char *key, char *buf, int bufsz) {
Expand Down Expand Up @@ -84,6 +86,13 @@ static void route_edge_visitor(const cbm_gbuf_edge_t *edge, void *userdata) {
if (!url || !url[0]) {
return;
}
char callee_buf[CBM_SZ_256];
const char *callee =
json_extract(edge->properties_json, "callee", callee_buf, sizeof(callee_buf));
if (strcmp(edge->type, "HTTP_CALLS") == 0 &&
!cbm_service_pattern_is_http_route_literal(url, callee)) {
return;
}

/* Extract method or broker */
char method_buf[CBM_SZ_16];
Expand Down Expand Up @@ -573,6 +582,15 @@ typedef struct {
const char *edge_type;
} caller_edge_ref_t;

static bool http_call_edge_has_valid_route(const cbm_gbuf_edge_t *edge) {
char url_buf[CBM_SZ_512];
const char *url = json_extract(edge->properties_json, "url_path", url_buf, sizeof(url_buf));
char callee_buf[CBM_SZ_256];
const char *callee =
json_extract(edge->properties_json, "callee", callee_buf, sizeof(callee_buf));
return cbm_service_pattern_is_http_route_literal(url, callee);
}

/* Try to create a DATA_FLOWS edge between caller and handler via a route.
* Returns: 1=created, 0=skipped (self/duplicate), -1=skipped (has direct call). */
static int try_create_data_flow(cbm_gbuf_t *gb, int64_t caller_id, int64_t handler_id,
Expand Down Expand Up @@ -655,6 +673,9 @@ static int collect_caller_edges(cbm_gbuf_t *gb, int64_t route_id, caller_edge_re
int http_count = 0;
cbm_gbuf_find_edges_by_target_type(gb, route_id, "HTTP_CALLS", &http_edges, &http_count);
for (int i = 0; i < http_count && n < max_out; i++) {
if (!http_call_edge_has_valid_route(http_edges[i])) {
continue;
}
out[n].source_id = http_edges[i]->source_id;
out[n].props = http_edges[i]->properties_json;
out[n].edge_type = "HTTP_CALLS";
Expand Down
124 changes: 115 additions & 9 deletions tests/test_infrascan.c
Original file line number Diff line number Diff line change
@@ -1,13 +1,119 @@
/*
* test_infrascan.c — REMOVED: infrascan tests already exist in test_pipeline.c.
*
* This file is intentionally empty. All infrascan tests are in test_pipeline.c
* (infra_parse_dockerfile_*, infra_parse_dotenv*, infra_parse_shell*,
* infra_parse_terraform*, infra_is_*, infra_clean_json_brackets,
* infra_secret_detection, infra_qn_helper).
*/
#include "test_framework.h"
#include "graph_buffer/graph_buffer.h"
#include "pipeline/pipeline_internal.h"

#include <stdbool.h>
#include <stdint.h>

bool cbm_service_pattern_is_http_route_literal(const char *literal, const char *callee_name);

static int has_data_flow(cbm_gbuf_t *gb, int64_t source_id, int64_t target_id) {
const cbm_gbuf_edge_t **edges = NULL;
int count = 0;
cbm_gbuf_find_edges_by_source_type(gb, source_id, "DATA_FLOWS", &edges, &count);
for (int i = 0; i < count; i++) {
if (edges[i]->target_id == target_id) {
return 1;
}
}
return 0;
}

TEST(infrascan_http_route_literal_guard_rejects_filesystem_paths) {
ASSERT_FALSE(cbm_service_pattern_is_http_route_literal("/etc/crio/crio.conf", "requests.get"));
ASSERT_FALSE(
cbm_service_pattern_is_http_route_literal("/root/.aws/credentials", "requests.get"));
ASSERT_FALSE(cbm_service_pattern_is_http_route_literal("/var/run/app.json", "requests.get"));
ASSERT_FALSE(cbm_service_pattern_is_http_route_literal("/locations/", "str.split"));
ASSERT_FALSE(cbm_service_pattern_is_http_route_literal("/api", "os.path.join"));
ASSERT_FALSE(cbm_service_pattern_is_http_route_literal(NULL, "requests.get"));
ASSERT_FALSE(cbm_service_pattern_is_http_route_literal("", "requests.get"));
ASSERT_TRUE(cbm_service_pattern_is_http_route_literal("/api/orders", "requests.get"));
ASSERT_TRUE(cbm_service_pattern_is_http_route_literal("https://orders.example/api/orders",
"requests.get"));
PASS();
}

TEST(infrascan_route_nodes_skip_bad_http_url_paths) {
cbm_gbuf_t *gb = cbm_gbuf_new("test", "/tmp/cbm_infrascan_route_guard");
ASSERT_NOT_NULL(gb);
int64_t caller =
cbm_gbuf_upsert_node(gb, "Function", "client", "test.client", "client.py", 1, 3, "{}");
int64_t fs_callee =
cbm_gbuf_upsert_node(gb, "Function", "requests.get", "requests.get", "", 0, 0, "{}");
int64_t split_callee =
cbm_gbuf_upsert_node(gb, "Function", "str.split", "str.split", "", 0, 0, "{}");
int64_t empty_callee =
cbm_gbuf_upsert_node(gb, "Function", "requests.post", "requests.post", "", 0, 0, "{}");
ASSERT_GT(caller, 0);
ASSERT_GT(fs_callee, 0);
ASSERT_GT(split_callee, 0);
ASSERT_GT(empty_callee, 0);

cbm_gbuf_insert_edge(gb, caller, fs_callee, "HTTP_CALLS",
"{\"callee\":\"requests.get\",\"url_path\":\"/etc/crio/crio.conf\","
"\"method\":\"GET\"}");
cbm_gbuf_insert_edge(gb, caller, split_callee, "HTTP_CALLS",
"{\"callee\":\"str.split\",\"url_path\":\"/locations/\","
"\"method\":\"ANY\"}");
cbm_gbuf_insert_edge(gb, caller, empty_callee, "HTTP_CALLS",
"{\"callee\":\"requests.get\",\"method\":\"GET\"}");

cbm_pipeline_create_route_nodes(gb);

ASSERT_NULL(cbm_gbuf_find_by_qn(gb, "__route__GET__/etc/crio/crio.conf"));
ASSERT_NULL(cbm_gbuf_find_by_qn(gb, "__route__ANY__/locations/"));
ASSERT_NULL(cbm_gbuf_find_by_qn(gb, "__route__GET__"));

cbm_gbuf_free(gb);
PASS();
}

TEST(infrascan_http_calls_join_matching_handler_route) {
cbm_gbuf_t *gb = cbm_gbuf_new("test", "/tmp/cbm_infrascan_route_join");
ASSERT_NOT_NULL(gb);

int64_t route = cbm_gbuf_upsert_node(gb, "Route", "/api/orders", "__route__GET__/api/orders",
"", 0, 0, "{\"method\":\"GET\"}");
int64_t handler = cbm_gbuf_upsert_node(gb, "Function", "get_orders", "test.get_orders",
"server.py", 1, 3, "{}");
int64_t client =
cbm_gbuf_upsert_node(gb, "Function", "client", "test.client", "client.py", 1, 3, "{}");
int64_t bad_route =
cbm_gbuf_upsert_node(gb, "Route", "/etc/crio/crio.conf",
"__route__GET__/etc/crio/crio.conf", "", 0, 0, "{\"method\":\"GET\"}");
int64_t bad_handler = cbm_gbuf_upsert_node(gb, "Function", "bad_handler", "test.bad_handler",
"server.py", 5, 7, "{}");
int64_t bad_client = cbm_gbuf_upsert_node(gb, "Function", "bad_client", "test.bad_client",
"client.py", 5, 7, "{}");
ASSERT_GT(route, 0);
ASSERT_GT(handler, 0);
ASSERT_GT(client, 0);
ASSERT_GT(bad_route, 0);
ASSERT_GT(bad_handler, 0);
ASSERT_GT(bad_client, 0);

cbm_gbuf_insert_edge(gb, handler, route, "HANDLES", "{\"handler\":\"test.get_orders\"}");
cbm_gbuf_insert_edge(gb, client, route, "HTTP_CALLS",
"{\"callee\":\"requests.get\",\"url_path\":\"/api/orders\","
"\"method\":\"GET\"}");
cbm_gbuf_insert_edge(gb, bad_handler, bad_route, "HANDLES",
"{\"handler\":\"test.bad_handler\"}");
cbm_gbuf_insert_edge(gb, bad_client, bad_route, "HTTP_CALLS",
"{\"callee\":\"requests.get\",\"url_path\":\"/etc/crio/crio.conf\","
"\"method\":\"GET\"}");

cbm_pipeline_create_route_nodes(gb);

ASSERT_TRUE(has_data_flow(gb, client, handler));
ASSERT_FALSE(has_data_flow(gb, bad_client, bad_handler));

cbm_gbuf_free(gb);
PASS();
}

SUITE(infrascan) {
/* All infrascan tests live in test_pipeline.c's pipeline suite */
RUN_TEST(infrascan_http_route_literal_guard_rejects_filesystem_paths);
RUN_TEST(infrascan_route_nodes_skip_bad_http_url_paths);
RUN_TEST(infrascan_http_calls_join_matching_handler_route);
}
Loading