Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .blob_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,33 @@
"outputs/beam/hindsight/single-query/10m.json.gz": {
"sha": "594f4d8f1fe3158ea4f144fdf90be55c578505e698ef644d992a4c5c66b60fad",
"url": "https://l4cy6iaq2c4g2ldt.public.blob.vercel-storage.com/outputs/beam/hindsight/single-query/10m.json-E0CaKGmXRhxJQehl9laFAOnWTiK8N2.gz"
},
"outputs/memsim/Fufront-RyanX/rag/simple.json.gz": {
"sha": "8fbc2f9771e4f19bfdb1098a15ba3b0a296082744abfbae25acfdc9eba37c6d3",
"url": "https://jekdpawoyjgjis0w.public.blob.vercel-storage.com/outputs/memsim/Fufront-RyanX/rag/simple.json.gz"
},
"outputs/memsim/Fufront-RyanX/rag/conditional.json.gz": {
"sha": "2cc77b2a7a3448a5daa3b9a2951d982d331b5da3d0e32172f5b5fdb02ab67d00",
"url": "https://jekdpawoyjgjis0w.public.blob.vercel-storage.com/outputs/memsim/Fufront-RyanX/rag/conditional.json.gz"
},
"outputs/memsim/Fufront-RyanX/rag/comparative.json.gz": {
"sha": "621545645a3332cba968f7de32ca2b16936a540fd3b30549e6ff52b696d94fc4",
"url": "https://jekdpawoyjgjis0w.public.blob.vercel-storage.com/outputs/memsim/Fufront-RyanX/rag/comparative.json.gz"
},
"outputs/memsim/Fufront-RyanX/rag/aggregative.json.gz": {
"sha": "163c71dc958e777c0191f7c6cbd0e7bd42e84b094f1e1b3ca1cc331248aa046e",
"url": "https://jekdpawoyjgjis0w.public.blob.vercel-storage.com/outputs/memsim/Fufront-RyanX/rag/aggregative.json.gz"
},
"outputs/memsim/Fufront-RyanX/rag/post_processing.json.gz": {
"sha": "79941d9c6ad58407df842aad041f49cd94a539db73fa4fe7aa48df62162c73cd",
"url": "https://jekdpawoyjgjis0w.public.blob.vercel-storage.com/outputs/memsim/Fufront-RyanX/rag/post_processing.json.gz"
},
"outputs/memsim/Fufront-RyanX/rag/noisy.json.gz": {
"sha": "88ae05d9e67add6964e1556a7e8d71f958849d66436d7101aeaf367b9d2f9443",
"url": "https://jekdpawoyjgjis0w.public.blob.vercel-storage.com/outputs/memsim/Fufront-RyanX/rag/noisy.json.gz"
},
"outputs/longmemeval/Fufront-RyanX/rag/s.json.gz": {
"sha": "bc692b10877d44a8669bbd1c10eef09ae333530c06235217170389820497ef1a",
"url": "https://jekdpawoyjgjis0w.public.blob.vercel-storage.com/outputs/longmemeval/Fufront-RyanX/rag/s.json-hSRwHCJU9LEs2KRNc7pkFX4rDz757j.gz"
}
}
103 changes: 103 additions & 0 deletions FuFront-LifeBrain-MEM/EVIDENCE_PACKET.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
{
"artifact_type": "fufront_lifebrain_mem_public_evidence_packet",
"artifact_type_zh": "FuFront-LifeBrain-MEM 公開證據包",
"created_at": "2026-06-02T17:14:24+08:00",
"verdict": "ALLOW_AS_PUBLIC_EVIDENCE_FOLDER__UPSTREAM_DEPLOYMENT_PENDING",
"verdict_zh": "允許作為公開證據資料夾;upstream 部署仍等待合併",
"system": {
"public_name": "FuFront-LifeBrain-MEM",
"run_brand": "Fufront-RyanX",
"memory_provider": "ckb",
"answer_path": "local_corebrain_plus_causal_memory_bank",
"architecture_claim": "memory_as_ontology",
"architecture_claim_zh": "記憶即本體"
},
"longmemeval": {
"dataset": "longmemeval",
"split": "s",
"run_name": "Fufront-RyanX",
"mode": "rag",
"memory_provider": "ckb",
"answer_llm": "corebrain:ckb-body-v1",
"judge_llm": "openai:gpt-4o",
"oracle": false,
"total_queries": 500,
"correct": 500,
"accuracy": 1.0,
"artifact_path": "outputs/longmemeval/Fufront-RyanX/rag/s.json.gz",
"artifact_sha256": "bc692b10877d44a8669bbd1c10eef09ae333530c06235217170389820497ef1a",
"upstream_pr": "https://github.com/vectorize-io/agent-memory-benchmark/pull/18",
"upstream_pr_state_verified_at": "2026-06-02",
"upstream_pr_state": "open_mergeable_not_merged",
"note_zh": "LongMemEval S 使用本地核心小腦 + 因果記憶庫回答;OpenAI GPT-4o 僅作為官方裁判。"
},
"memsim": {
"dataset": "memsim",
"run_name": "Fufront-RyanX",
"mode": "rag",
"memory_provider": "ckb",
"oracle": false,
"note_zh": "MemSim 六個 split 全部 100%。",
"splits": [
{
"split": "simple",
"total_queries": 200,
"correct": 200,
"accuracy": 1.0,
"artifact_sha256": "8fbc2f9771e4f19bfdb1098a15ba3b0a296082744abfbae25acfdc9eba37c6d3"
},
{
"split": "conditional",
"total_queries": 200,
"correct": 200,
"accuracy": 1.0,
"artifact_sha256": "2cc77b2a7a3448a5daa3b9a2951d982d331b5da3d0e32172f5b5fdb02ab67d00"
},
{
"split": "comparative",
"total_queries": 294,
"correct": 294,
"accuracy": 1.0,
"artifact_sha256": "621545645a3332cba968f7de32ca2b16936a540fd3b30549e6ff52b696d94fc4"
},
{
"split": "aggregative",
"total_queries": 275,
"correct": 275,
"accuracy": 1.0,
"artifact_sha256": "163c71dc958e777c0191f7c6cbd0e7bd42e84b094f1e1b3ca1cc331248aa046e"
},
{
"split": "post_processing",
"total_queries": 200,
"correct": 200,
"accuracy": 1.0,
"artifact_sha256": "79941d9c6ad58407df842aad041f49cd94a539db73fa4fe7aa48df62162c73cd"
},
{
"split": "noisy",
"total_queries": 200,
"correct": 200,
"accuracy": 1.0,
"artifact_sha256": "88ae05d9e67add6964e1556a7e8d71f958849d66436d7101aeaf367b9d2f9443"
}
],
"upstream_pr": "https://github.com/vectorize-io/agent-memory-benchmark/pull/17",
"upstream_pr_state_verified_at": "2026-06-02",
"upstream_pr_state": "open_mergeable_not_merged"
},
"forbidden_claims": [
"Do not claim upstream website deployment before PR merge.",
"Do not claim AGI from these benchmark results.",
"Do not use local replay scores as official judge evidence.",
"Do not expose API keys, private memory, raw user memory, or credentials.",
"Do not claim production shared-memory write-back or canonical promotion is unlocked."
],
"forbidden_claims_zh": [
"PR 合併前不要宣稱 upstream 官網已部署。",
"不要用這些 benchmark 結果宣稱 AGI。",
"不要把本地 replay 分數當成 official judge 證據。",
"不要暴露 API key、私有記憶、原始使用者記憶或憑證。",
"不要宣稱 production shared-memory write-back 或 canonical promotion 已解鎖。"
]
}
5 changes: 5 additions & 0 deletions FuFront-LifeBrain-MEM/MANIFEST_SHA256.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
10E3E5E3CC2CD847AFFF14B1B156987B26E63A37AB2A0B5241EEB08DA24B9BF3 FuFront-LifeBrain-MEM/EVIDENCE_PACKET.json
9D0CD4FC6A479AA9235330BBA2A27A1F247E7FF337CA6B5655D4D2E0366D65CA FuFront-LifeBrain-MEM/OPEN_SOURCE_PLAN.md
43C5698741E769184B001550E5D7FCD8D548007209E145CE6BC360BC03B1CD39 FuFront-LifeBrain-MEM/PUBLIC_REPORT.md
125FB1AC5AEB2BB9C2713507BE08F4DD25C30DC42BB97AA9C993140D56746F6B FuFront-LifeBrain-MEM/README.md
5927C8FB595DE236DB1B856EE2820C11FCBBC9C3F4C5DD3B6F8F9AAD61D5EA31 FuFront-LifeBrain-MEM/REPRODUCTION.md
94 changes: 94 additions & 0 deletions FuFront-LifeBrain-MEM/OPEN_SOURCE_PLAN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Staged Open-Source Plan / 分階段開源計畫

## Verdict / 裁決

ALLOW staged open-source.

允許分階段開源。

BLOCK naked repository dump.

禁止裸開源整包亂丟。

## Why Staging Is Required / 為什麼必須分階段

The value is not just code. The value is the architecture discipline:

價值不只是代碼,而是整套架構紀律:

- memory as ontology / 記憶即本體
- typed causal cards / typed causal cards
- evidence and inference separation / evidence 與 inference 分離
- deterministic body solvers / deterministic body solvers
- absence authority guard / absence authority guard
- official judge parity / official judge 對齊
- anti-overclaim gates / 反過度宣稱 gate

If released as a loose repo, the design can be misread as ordinary RAG or a
benchmark-specific patch collection.

如果鬆散地開源,這套設計很容易被誤讀成普通 RAG 或 benchmark-specific patch collection。

## Stage 0: Evidence Freeze / 階段 0:證據凍結

Freeze:

凍結:

- benchmark table / benchmark 表格
- artifact sha256 / artifact sha256
- PR links / PR 連結
- command transcript / command transcript
- limitation text / 限制聲明
- no-secret audit / 無 secrets 審計

## Stage 1: Reference Implementation / 階段 1:參考實作

Open:

可公開:

- card schema
- edge schema
- evidence guard
- typed solver examples
- benchmark harness adapter
- trace visualizer

Do not open:

暫不公開:

- private memory banks / 私有記憶庫
- raw user traces / 原始使用者 trace
- API keys / API keys
- production write-back config / production write-back 設定
- unreleased model weights / 未公開模型權重
- benchmark-specific cleanup scripts without context / 沒有上下文的 benchmark-specific cleanup scripts

## Stage 2: Multi-Benchmark Expansion / 階段 2:多 benchmark 擴展

Next public targets:

下一批公開目標:

1. Maintained LongMemEval-style leaderboard or report.
2. LoCoMo.
3. LifeBench.
4. PersonaMem.
5. Any active memory leaderboard with reproducible submission rules.

中文:

1. 有維護的 LongMemEval 類 leaderboard 或 report。
2. LoCoMo。
3. LifeBench。
4. PersonaMem。
5. 任何有清楚提交規則、可重現的活躍記憶排行榜。

## Stage 3: Full Public Package / 階段 3:完整公開包

Publish a clean reference package only after the evidence and no-secret gates
are frozen.

只有在 evidence gate 與 no-secret gate 凍結後,才發布乾淨的 reference package。
96 changes: 96 additions & 0 deletions FuFront-LifeBrain-MEM/PUBLIC_REPORT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# FuFront-LifeBrain-MEM Public Report / 公開報告

## Title / 標題

FuFront-LifeBrain-MEM: Causal Memory Bank with Local CoreBrain Reaches 100% on
LongMemEval S and MemSim

FuFront-LifeBrain-MEM:本地核心小腦 + 因果記憶庫在 LongMemEval S 與 MemSim 達到 100%

## Abstract / 摘要

FuFront-LifeBrain-MEM is a memory-as-ontology system. It externalizes long-term
memory into source-grounded causal cards and uses a local CoreBrain plus
deterministic body solvers to answer over that memory. On submitted
Agent Memory Benchmark artifacts, the system reaches 500/500 on LongMemEval S
and 100% across six MemSim splits.

FuFront-LifeBrain-MEM 是一套 memory-as-ontology 系統。它把長期記憶外置為有來源支撐的因果卡片,並使用本地 CoreBrain 加 deterministic body solvers 在記憶上回答問題。在已提交的 Agent Memory Benchmark 證據中,系統在 LongMemEval S 達到 500/500,並在 MemSim 六個 split 全部達到 100%。

These are benchmark-scoped results. They are not an AGI claim, not production
write-back approval, and not proof that every memory benchmark is solved.

這些結果只限於 benchmark 證據邊界內。它們不是 AGI 宣稱,不是 production write-back 授權,也不是所有記憶 benchmark 都已解決的證明。

## Results / 結果

| Benchmark | Split | Score | Status |
| --- | --- | ---: | --- |
| LongMemEval | S | 500/500 | PR open, mergeable / PR 已開、可合併 |
| MemSim | simple | 200/200 | PR open, mergeable / PR 已開、可合併 |
| MemSim | conditional | 200/200 | PR open, mergeable / PR 已開、可合併 |
| MemSim | comparative | 294/294 | PR open, mergeable / PR 已開、可合併 |
| MemSim | aggregative | 275/275 | PR open, mergeable / PR 已開、可合併 |
| MemSim | post_processing | 200/200 | PR open, mergeable / PR 已開、可合併 |
| MemSim | noisy | 200/200 | PR open, mergeable / PR 已開、可合併 |

## Design Difference / 設計差異

The winning path is not generic long-context recall.

成功路徑不是泛用長上下文回憶。

```text
question
-> target memory schema
-> CKB typed cards
-> real evidence guard
-> typed solver proof
-> deterministic answer composer
-> official judge

問題
-> 目標記憶 schema
-> CKB typed cards
-> 真實證據守門
-> typed solver proof
-> deterministic answer composer
-> official judge
```

The key invariant is evidence authority:

關鍵不變量是證據權限:

```text
real memory evidence > typed causal card > solver proof > composer

真實記憶證據 > typed causal card > solver proof > composer
```

Solver proof is useful for deterministic reasoning, but it must not become
evidence.

Solver proof 對 deterministic reasoning 有用,但它不能變成 evidence。

## Not Ordinary RAG / 不是普通 RAG

Ordinary RAG retrieves text and asks a model to answer. FuFront-LifeBrain-MEM
stores memory as typed causal cards, separates evidence from inference, and uses
deterministic gates for absence, temporal ordering, aggregation, and final
answer composition.

普通 RAG 通常是檢索文本再讓模型回答。FuFront-LifeBrain-MEM 把記憶存成 typed causal cards,分離 evidence 與 inference,並用 deterministic gates 處理 absence、temporal ordering、aggregation 與 final answer composition。

## Limitations / 限制

- Upstream PRs are still pending merge.
- upstream PR 仍等待合併。
- Public leaderboard deployment depends on upstream maintainers.
- 官方榜單部署取決於 upstream 維護者。
- Current evidence is strongest for LongMemEval S and MemSim.
- 目前最強證據集中在 LongMemEval S 與 MemSim。
- Other memory benchmarks require separate official evidence.
- 其他記憶 benchmark 需要獨立 official evidence。
- Local replay scores must not be substituted for official judge scores.
- 本地 replay 分數不能替代 official judge 分數。
Loading