-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembedder.js
More file actions
102 lines (80 loc) · 2.97 KB
/
Copy pathembedder.js
File metadata and controls
102 lines (80 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
const VOYAGE_API_KEY = "pa-oHelEBOTlJvQMMsyJ_6yBuMM53_w5V6CayD6XhtoiZw";
const embedChunks = async (chunks) => {
console.log("hello from embedChunks");
const BATCH_SIZE = 20;
const allEmbedded = [];
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
const batch = chunks.slice(i, i + BATCH_SIZE);
console.log("hitting voyage api with batch size:", batch.length);
const response = await fetch("https://api.voyageai.com/v1/embeddings", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${VOYAGE_API_KEY}`,
},
body: JSON.stringify({
input: batch,
model: "voyage-3",
}),
});
const data = await response.json();
console.log("Voyage raw response:", JSON.stringify(data).slice(0, 200));
if (!data.data) {
console.error("Voyage error:", data);
throw new Error("Voyage API failed");
}
const batchEmbedded = batch.map((text, j) => ({
text: text,
vector: data.data[j].embedding,
}));
allEmbedded.push(...batchEmbedded);
console.log(`✅ Batch ${Math.floor(i / BATCH_SIZE) + 1} done`);
}
console.log(`✅ Total embedded: ${allEmbedded.length} chunks`);
return allEmbedded;
};
const embedQuery = async (query) => {
const response = await fetch("https://api.voyageai.com/v1/embeddings", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${VOYAGE_API_KEY}`,
},
body: JSON.stringify({
input: [query],
model: "voyage-3",
}),
});
const data = await response.json();
if (!data.data) {
console.error("Voyage error:", data);
throw new Error("Voyage API failed");
}
console.log("✅ Query embedded");
return data.data[0].embedding;
};
// ── TEST ──────────────────────────────────────
async function test() {
console.log("\n========== TESTING embedChunks ==========");
const dummyChunks = [
"RAG stands for Retrieval Augmented Generation.",
"It helps LLMs access external knowledge.",
"RAG reduces hallucinations significantly.",
"Pinecone is a vector database.",
"Voyage AI provides embedding models.",
];
const embedded = await embedChunks(dummyChunks);
console.log("\n--- embedChunks Results ---");
console.log("Total chunks embedded :", embedded.length);
console.log("First chunk text :", embedded[0].text);
console.log("Vector dimensions :", embedded[0].vector.length);
console.log("First 5 vector values :", embedded[0].vector.slice(0, 5));
console.log("\n========== TESTING embedQuery ==========");
const query = "What is RAG?";
const vector = await embedQuery(query);
console.log("\n--- embedQuery Results ---");
console.log("Query :", query);
console.log("Vector dimensions :", vector.length);
console.log("First 5 values :", vector.slice(0, 5));
}
test().catch(console.error);