Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions src/google/adk/memory/in_memory_memory_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def _user_key(app_name: str, user_id: str) -> str:


def _extract_words_lower(text: str) -> set[str]:
"""Extracts words from a string and converts them to lowercase."""
return set([word.lower() for word in re.findall(r'[A-Za-z]+', text)])
"""Extracts Unicode-aware tokens from a string in lowercase."""
return set(word.lower() for word in re.findall(r'\w+', text))


class InMemoryMemoryService(BaseMemoryService):
Expand Down Expand Up @@ -116,13 +116,19 @@ async def search_memory(
for event in session_events:
if not event.content or not event.content.parts:
continue
words_in_event = _extract_words_lower(
' '.join([part.text for part in event.content.parts if part.text])
event_text = ' '.join(
[part.text for part in event.content.parts if part.text]
)
words_in_event = _extract_words_lower(event_text)
if not words_in_event:
continue

if any(query_word in words_in_event for query_word in words_in_query):
event_text_lower = event_text.lower()
if any(
query_word in words_in_event
or (not query_word.isascii() and query_word in event_text_lower)
for query_word in words_in_query
):
response.memories.append(
MemoryEntry(
content=event.content,
Expand Down
50 changes: 50 additions & 0 deletions tests/unittests/memory/test_in_memory_memory_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,53 @@ async def test_search_memory_is_scoped_by_user():
assert (
result_other_user.memories[0].content.parts[0].text == 'This is a secret.'
)


# --- Non-Latin language tests ---


@pytest.mark.asyncio
@pytest.mark.parametrize(
'event_text,query,expected_count',
[
# Japanese (no space delimiters — substring fallback)
('私の名前は太郎です', '太郎', 1),
('私の名前は太郎です', '天気', 0),
# Chinese (no space delimiters — substring fallback)
('我喜欢机器学习', '机器学习', 1),
('我喜欢机器学习', '天气预报', 0),
# Korean (space-delimited — token match)
('제 이름은 민수입니다', '민수입니다', 1),
# Cyrillic (space-delimited — token match)
('Меня зовут Алексей', 'Алексей', 1),
# Mixed: non-Latin substring + Latin token in same event
('太郎 works at ABC Corp', '太郎', 1),
('太郎 works at ABC Corp', 'ABC', 1),
# Latin partial-word must NOT match (regression guard)
('I like to code in Python.', 'thon', 0),
],
)
async def test_search_memory_non_latin(event_text, query, expected_count):
"""Tests search_memory with non-Latin scripts and mixed content."""
session = Session(
app_name=MOCK_APP_NAME,
user_id=MOCK_USER_ID,
id='session-i18n',
last_update_time=7000,
events=[
Event(
id='event-i18n',
invocation_id='inv-i18n',
author='user',
timestamp=90000,
content=types.Content(parts=[types.Part(text=event_text)]),
),
],
)
memory_service = InMemoryMemoryService()
await memory_service.add_session_to_memory(session)

result = await memory_service.search_memory(
app_name=MOCK_APP_NAME, user_id=MOCK_USER_ID, query=query
)
assert len(result.memories) == expected_count