How to Build Memory-Driven AI Agents with Short-Term, Long-Term, and Episodic Memory

February 2, 2026 - By prakhar@affmantra.com
def openai_chat(system: str, user: str) -> str:
   resp = client.chat.completions.create(
       model=OPENAI_MODEL,
       messages=[
           {"role": "system", "content": system},
           {"role": "user", "content": user},
       ],
       temperature=0.3
   )
   return resp.choices[0].message.content


def heuristic_responder(context: str, question: str) -> str:
   lessons = re.findall(r"Lessons=(.*)", context)
   avoid = re.findall(r"Avoid=(.*)", context)
   ltm_lines = [ln for ln in context.splitlines() if ln.startswith("[LTM:")]


   steps = []
   if lessons:
       for chunk in lessons[:2]:
           for s in [x.strip() for x in chunk.split(";") if x.strip()]:
               steps.append(s)
   for ln in ltm_lines:
       if "[LTM:procedure]" in ln.lower():
           proc = re.sub(r"^\[LTM:procedure\]\s*", "", ln, flags=re.I)
           proc = proc.split("(salience=")[0].strip()
           for part in [p.strip() for p in proc.split("|") if p.strip()]:
               steps.append(part)


   steps = steps[:8] if steps else ["Clarify the target outcome and constraints.", "Use semantic recall + episodic lessons to propose a plan.", "Execute, then store lessons learned."]


   pitfalls = []
   if avoid:
       for chunk in avoid[:2]:
           for s in [x.strip() for x in chunk.split(";") if x.strip()]:
               pitfalls.append(s)
   pitfalls = pitfalls[:6]


   prefs = [ln for ln in ltm_lines if "[LTM:preference]" in ln.lower()]
   facts = [ln for ln in ltm_lines if "[LTM:fact]" in ln.lower() or "[LTM:constraint]" in ln.lower()]


   out = []
   out.append("Answer (memory-informed, offline fallback)\n")
   if prefs:
       out.append("Relevant preferences/constraints remembered:")
       for ln in (prefs + facts)[:6]:
           out.append(" - " + ln.split("] ",1)[1].split(" (salience=")[0].strip())
       out.append("")
   out.append("Recommended approach:")
   for i, s in enumerate(steps, 1):
       out.append(f" {i}. {s}")
   if pitfalls:
       out.append("\nPitfalls to avoid (from episodic traces):")
       for p in pitfalls:
           out.append(" - " + p)
   out.append("\n(If you add an API key, the same memory context will feed a stronger LLM for higher-quality responses.)")
   return "\n".join(out).strip()


class MemoryAugmentedAgent:
   def __init__(self, mem: MemoryEngine):
       self.mem = mem


   def answer(self, question: str) -> Dict[str, Any]:
       pack = self.mem.retrieve(question)
       context = self.mem.build_context(question, pack)


       system = (
           "You are a memory-augmented agent. Use the provided memory context.\n"
           "Prioritize:\n"
           "1) Episodic lessons (what worked before)\n"
           "2) Long-term facts/preferences/procedures\n"
           "3) Short-term conversation state\n"
           "Be concrete and stepwise. If memory conflicts, state the uncertainty."
       )


       if USE_OPENAI:
           reply = openai_chat(system=system, user=context + "\n\nUser question:\n" + question)
       else:
           reply = heuristic_responder(context=context, question=question)


       self.mem.st_add("user", question, kind="message")
       self.mem.st_add("assistant", reply, kind="message")


       return {"reply": reply, "pack": pack, "context": context}


mem = MemoryEngine()
agent = MemoryAugmentedAgent(mem)


mem.ltm_add(kind="preference", text="Prefer concise, structured answers with steps and bullet points when helpful.", tags=["style"], pinned=True)
mem.ltm_add(kind="preference", text="Prefer solutions that run on Google Colab without extra setup.", tags=["environment"], pinned=True)
mem.ltm_add(kind="procedure", text="When building agent memory: embed items, store with salience/novelty policy, retrieve with hybrid semantic+episodic, and decay overuse to avoid repetition.", tags=["agent-memory"])
mem.ltm_add(kind="constraint", text="If no API key is available, provide a runnable offline fallback instead of failing.", tags=["robustness"], pinned=True)


mem.episode_add(
   task="Build an agent memory layer for troubleshooting Python errors in Colab",
   constraints={"offline_ok": True, "single_notebook": True},
   plan=[
       "Capture short-term chat context",
       "Store durable constraints/preferences in long-term vector memory",
       "After solving, extract lessons into episodic traces",
       "On new tasks, retrieve top episodic lessons + semantic facts"
   ],
   actions=[
       {"type":"analysis", "detail":"Identified recurring failure: missing installs and version mismatches."},
       {"type":"action", "detail":"Added pip install block + minimal fallbacks."},
       {"type":"action", "detail":"Added memory policy: pin constraints, drop low-salience items."}
   ],
   result="Notebook became robust: runs with or without external keys; troubleshooting quality improved with episodic lessons.",
   outcome_score=0.90,
   lessons=[
       "Always include a pip install cell for non-standard deps.",
       "Pin hard constraints (e.g., offline fallback) into long-term memory.",
       "Store a post-task 'lesson list' as an episodic trace for reuse."
   ],
   failure_modes=[
       "Assuming an API key exists and crashing when absent.",
       "Storing too much noise into long-term memory causing irrelevant recall context."
   ],
   tags=["colab","robustness","memory"]
)


print("✅ Memory engine initialized.")
print(f"   LTM items: {len(mem.ltm)} | Episodes: {len(mem.episodes)} | ST items: {len(mem.short_term)}")


q1 = "I want to build memory for an agent in Colab. What should I store and how do I retrieve it?"
out1 = agent.answer(q1)
print("\n" + "="*90)
print("Q1 REPLY\n")
print(out1["reply"][:1800])


q2 = "How do I avoid my agent repeating the same memory over and over?"
out2 = agent.answer(q2)
print("\n" + "="*90)
print("Q2 REPLY\n")
print(out2["reply"][:1800])


def simple_outcome_eval(text: str) -> float:
   hits = 0
   for kw in ["decay", "usage", "penalty", "novelty", "prune", "retrieve", "episodic", "semantic"]:
       if kw in text.lower():
           hits += 1
   return float(np.clip(hits/8.0, 0.0, 1.0))


score2 = simple_outcome_eval(out2["reply"])
mem.episode_add(
   task="Prevent repetitive recall in a memory-augmented agent",
   constraints={"must_be_simple": True, "runs_in_colab": True},
   plan=[
       "Track usage counts per memory item",
       "Apply usage-based penalty during ranking",
       "Boost novelty during storage to reduce duplicates",
       "Optionally prune low-salience memories"
   ],
   actions=[
       {"type":"design", "detail":"Added usage-based penalty 1/(1+alpha*usage)."},
       {"type":"design", "detail":"Used novelty = 1 - max_similarity at store time."}
   ],
   result=out2["reply"][:600],
   outcome_score=score2,
   lessons=[
       "Penalize overused memories during ranking (usage decay).",
       "Enforce novelty threshold at storage time to prevent duplicates.",
       "Keep episodic lessons distilled to avoid bloated recall context."
   ],
   failure_modes=[
       "No usage tracking, causing one high-similarity memory to dominate forever.",
       "Storing raw chat logs as LTM instead of distilled summaries."
   ],
   tags=["ranking","decay","policy"]
)


cons = mem.consolidate()
print("\n" + "="*90)
print("CONSOLIDATION RESULT:", cons)


print("\n" + "="*90)
print("LTM (top rows):")
display(mem.ltm_df().head(12))


print("\n" + "="*90)
print("EPISODES (top rows):")
display(mem.episodes_df().head(12))


def debug_retrieval(query: str):
   pack = mem.retrieve(query)
   ctx = mem.build_context(query, pack)
   sem = []
   for mid, sc in pack["semantic_scored"]:
       it = mem.ltm[mid]
       sem.append({"mem_id": mid, "score": sc, "kind": it.kind, "salience": it.salience, "usage": it.usage, "text": it.text[:160]})
   ep = []
   for eid, sc in pack["episodic_scored"]:
       e = mem.episodes[eid]
       ep.append({"ep_id": eid, "score": sc, "outcome": e.outcome_score, "task": e.task[:140], "lessons": " | ".join(e.lessons[:4])})
   return ctx, pd.DataFrame(sem), pd.DataFrame(ep)


print("\n" + "="*90)
ctx, sem_df, ep_df = debug_retrieval("How do I design an agent memory policy for storage and retrieval?")
print(ctx[:1600])
print("\nTop semantic hits:")
display(sem_df)
print("\nTop episodic hits:")
display(ep_df)


print("\n✅ Done. You now have working short-term, long-term vector, and episodic memory with storage/retrieval policies in one Colab snippet.")
Source link
How to Build Memory-Driven AI Agents with Short-Term, Long-Term, and Episodic Memory

Related Posts

Liquid AI’s New LFM2-24B-A2B Hybrid Architecture Blends Attention with Convolutions to Solve the Scaling Bottlenecks of Modern LLMs

NVIDIA AI Brings Nemotron-3-Nano-30B to NVFP4 with Quantization Aware Distillation (QAD) for Efficient Reasoning Inference

A Coding and Experimental Analysis of Decentralized Federated Learning with Gossip Protocols and Differential Privacy

Leave a Reply Cancel reply