-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathschema.sql
More file actions
308 lines (286 loc) · 9.52 KB
/
schema.sql
File metadata and controls
308 lines (286 loc) · 9.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
/*
Table Name: access_credential
Owner: core
Description: Credentials used for remote private repositories, currently not implemented.
*/
/*
Table Name: analysis_run_detail
Owner: core
Description: Item of analysis_run_header, represents a singular step of analysis.
*/
/*
Table Name: analysis_run_header
Owner: core
Description: Represents a whole analysis, aggregates common fields of the analysis_run_detail.
*/
/*
Table Name: branches
Owner: core
Description: Persisted branches of a repository.
*/
/*
Table Name: datasets_v1
Owner: miner
Description: Persisted data in df_pred v1 format.
*/
/*
Table Name: files
Owner: miner
Description: Maps the file name with an index, not used at the moment.
*/
/*
Table Name: impact_user
Owner: core
Description: User of the application, named this way to avoid conflicts/confusion with the user table of Postgres.
*/
/*
Table Name: jobs
Owner: miner
Description: Persisted single call of run function.
*/
/*
Table Name: project
Owner: core
Description: Collection of one or more repositories, can be associated with one or more tenants.
*/
/*
Table Name: project_repository
Owner: core
Description: Many-to-Many table to associate projects to repositories.
*/
/*
Table Name: projects
Owner: miner
Description: Persisted collection of repo_miner.
*/
/*
Table Name: repo_miners
Owner: miner
Description: Persisted options for the mine.
*/
/*
Table Name: repository
Owner: core
Description: Persisted git repositories.
*/
/*
Table Name: shared_states
Owner: miner
Description: Persisted state for resuming mining.
*/
/*
Table Name: tenant
Owner: core
Description: An organization.
*/
/*
Table Name: tenant_project
Owner: core
Description: Many-to-Many table to associate projects to tenants.
*/
/*
Table Name: tenant_user
Owner: core
Description: Many-to-Many table to associate users to tenants.
*/
/*
Table Name: trained_model
Owner: prediction
Description: Model used to perform predictions.
*/
/*
Table Name: trained_model_branch
Owner: prediction
Description: Many-to-Many table to associate branches with trained models.
*/
/*
Table Name: training_run
Owner: prediction
Description: Instance of a training.
*/
/*
Table Name: diffs_v1
Owner: miner
Description: persist data in diffs format, version 1.
*/
create table if not exists public.impact_user (
id bigserial primary key,
username varchar(255) not null unique,
password varchar(255) not null,
email varchar(255) not null unique
);
create table if not exists public.tenant (
id bigserial primary key,
name varchar(255) not null,
description text,
owner bigint not null constraint fk_owner references public.impact_user,
join_code varchar(100) default NULL :: character varying
);
create table if not exists public.tenant_user (
user_id bigint not null constraint fk_user references public.impact_user on delete cascade,
tenant_id bigint constraint fk_tenant references public.tenant
);
create table if not exists public.project (
id bigserial primary key,
name varchar(255) not null,
description text,
owner_id bigint not null constraint fk_owner_id references public.impact_user
);
create table if not exists public.access_credential (
id bigserial primary key,
name varchar(255) not null,
description text,
provider varchar(255) not null,
vcs_type varchar(255) not null,
credentials_json json not null,
project_id bigint not null constraint fk_project_id references public.project
);
create table if not exists public.repository (
id bigserial primary key,
name varchar(255) not null,
org varchar(255),
remote_url varchar(255),
volume_path varchar(255),
credential_id bigint constraint fk_credential_id references public.access_credential on delete
set
null,
project_id bigint not null constraint fk_project_id references public.project
);
create table if not exists public.project_repository (
project_id bigint not null constraint fk_project_id references public.project on delete cascade,
repository_id bigint not null constraint fk_repository_id references public.repository on delete cascade,
primary key (project_id, repository_id)
);
create table if not exists public.tenant_project (
tenant_id bigint not null constraint fk_tenant_id references public.tenant on delete cascade,
project_id bigint not null constraint fk_project_id references public.project on delete cascade,
primary key (tenant_id, project_id)
);
create table if not exists public.branches (
id bigserial primary key,
branch varchar not null,
repository_id bigint not null constraint fk_repository_id references public.repository on delete cascade
);
create table if not exists public.analysis_run_header (
id bigserial not null constraint analysis_run_pkey primary key,
creation_date timestamp default now(),
created_by bigint not null constraint fk_user_id references public.impact_user on delete cascade,
project_id bigint not null constraint fk_project_id references public.project on delete cascade,
error boolean default false not null,
analysis_type varchar default 'df_preds'::character varying not null
);
create table public.trained_model (
id bigserial primary key,
analysis_header bigint constraint analysis_header___fk references analysis_run_header,
model_type varchar default 'RandomForestClassifier' :: character varying not null
);
create table if not exists public.training_run (
id bigserial primary key,
start_date timestamp not null,
status varchar(255) not null,
last_analysed_commit varchar(50),
last_analysed_window integer,
duration bigint,
trained_model_id bigint not null constraint fk_trained_model_id references public.trained_model on delete cascade,
end_date timestamp
);
create table if not exists public.trained_model_branch (
id bigserial primary key,
branch_id bigint not null constraint fk_branch_id references public.branches on delete cascade,
trained_model_id bigint not null constraint fk_trained_model_id references public.trained_model on delete cascade
);
create table if not exists public.repo_miners (
id serial primary key,
project_id varchar(255) not null unique,
start_mining timestamp,
end_mining timestamp,
duration bigint,
status varchar(255),
opts bytea
);
create table if not exists public.projects (
id text not null primary key,
name text not null,
branch text not null,
source text not null,
state bytea
);
create table if not exists public.files (
id serial primary key,
value text not null,
project_id text references public.repo_miners (project_id),
unique (value, project_id)
);
create table if not exists public.jobs (
id serial primary key,
project_id text references public.repo_miners (project_id),
status text not null constraint jobs_status_check check (
status = ANY (
ARRAY ['initialized'::text, 'running'::text, 'completed'::text, 'failed'::text, 'up_to_date'::text]
)
),
start_date timestamp not null,
end_date timestamp
);
create table if not exists public.datasets_v1 (
id serial primary key,
-- To use timescale primary key must be removed, at the moment doing so break the training step
f1 text not null,
f2 text not null,
is_cochange integer not null constraint datasets_v1_is_cochange_check check (is_cochange = ANY (ARRAY [0, 1])),
project_id text not null references public.repo_miners (project_id),
windex integer not null constraint datasets_v1_windex_check check (windex > 0),
recency integer constraint datasets_v1_recency_check check (recency > 0),
author text not null
);
create table if not exists public.datasets_v2 (
id serial primary key,
-- To use timescale primary key must be removed, at the moment doing so break the training step
f1 text not null,
f2 text not null,
is_cochange integer not null constraint datasets_v2_is_cochange_check check (is_cochange = ANY (ARRAY [0, 1])),
project_id text not null references public.repo_miners (project_id),
windex integer not null constraint datasets_v2_windex_check check (windex >= 0),
recency integer constraint datasets_v2_recency_check check (recency >= 0),
author text not null,
child_when TIMESTAMP NOT NULL,
child_sha text not null
);
create table if not exists public.shared_states (
id serial primary key,
project_id text not null unique,
data bytea not null
);
create table if not exists public.analysis_run_detail (
id bigserial constraint analysis_run_detail_pk primary key,
type varchar not null,
status varchar not null,
start_date timestamp,
end_date timestamp,
header bigint not null constraint analysis_run_header___fk references public.analysis_run_header,
options jsonb
);
CREATE TABLE diffs_v1 (
id serial primary key,
project_id text not null, --references public.repo_miners (project_id),
parent_sha TEXT NOT NULL,
child_sha TEXT NOT NULL,
old_file TEXT NOT NULL,
new_file TEXT NOT NULL,
old_lines INTEGER NOT NULL,
new_lines INTEGER NOT NULL,
old_author TEXT NOT NULL,
new_author TEXT NOT NULL,
kind VARCHAR(10) NOT NULL,
"when" TIMESTAMP NOT NULL
);
-- SELECT create_hypertable('public.datasets_v1', 'windex', chunk_time_interval => 1);
INSERT INTO
public.impact_user (id, username, password, email)
VALUES
(
1,
'dev',
'$2a$10$D3HgetPAUcKQosjmeP8C3OsvV65xVwgZEYlj6JmMmsz.Do.1uiC3q',
''
);