Skip to content

Commit 23a6efa

Browse files
mmacphersonclaude
andcommitted
feat: update search data to Lucide v1.7.0, exclude search DB from wheel
- Rebase onto main (Lucide v1.7.0), regenerate descriptions for 9 new icons, skip 18 removed brand icons (chromium, github, twitter, etc.) - Exclude lucide-search.db from wheel via wheel-exclude (363 KB vs 5.3 MB) - Filter stale JSONL entries against icons DB during build - Read version metadata from icons DB instead of JSONL records - Add --icons-db flag to `lucide build-search` CLI Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 640f2e6 commit 23a6efa

6 files changed

Lines changed: 118 additions & 43 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,5 @@ source-include = [
149149
"tests/**",
150150
"src/lucide/data/*.db"
151151
]
152+
# Search DB is downloaded on first use; keep it out of the wheel
153+
wheel-exclude = ["**/lucide-search.db"]

src/lucide/build_search.py

Lines changed: 95 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -484,58 +484,24 @@ def _ensure_search_tables(conn: sqlite3.Connection) -> None:
484484
conn.commit()
485485

486486

487-
def build_search_db(
488-
jsonl_path: pathlib.Path,
487+
def _write_search_db( # noqa: PLR0913
489488
search_db_path: pathlib.Path,
489+
ordered_names: list[str],
490+
records: dict[str, DescriptionRecord],
491+
embeddings: list,
490492
clusters_path: pathlib.Path,
491493
*,
494+
version: str | None = None,
492495
verbose: bool = False,
493496
) -> None:
494-
"""Build the SQLite search database from descriptions, embeddings, and clusters.
495-
496-
Reads descriptions from *jsonl_path*, computes embeddings with fastembed,
497-
loads cluster assignments from *clusters_path*, and writes everything to
498-
*search_db_path*. The DB is rebuilt from scratch each time.
499-
500-
Args:
501-
jsonl_path: Input JSONL file with VLM descriptions.
502-
search_db_path: Output SQLite database path.
503-
clusters_path: JSON file with cluster assignments.
504-
verbose: Verbose logging.
505-
"""
497+
"""Write descriptions, embeddings, clusters, and metadata to SQLite."""
506498
import numpy as np # noqa: PLC0415
507-
from fastembed import TextEmbedding # noqa: PLC0415
508-
509-
records = load_descriptions_jsonl(jsonl_path)
510-
if not records:
511-
logger.warning("No descriptions found in %s", jsonl_path)
512-
return
513-
514-
ordered_names = sorted(records.keys())
515-
logger.info("Building search DB from %d descriptions", len(ordered_names))
516-
517-
# Build embedding input: name + tags + categories + description
518-
def _embedding_text(rec: DescriptionRecord) -> str:
519-
parts = [rec["name"].replace("-", " ")]
520-
if rec["tags"]:
521-
parts.append(f"Tags: {', '.join(rec['tags'])}")
522-
if rec["categories"]:
523-
parts.append(f"Categories: {', '.join(rec['categories'])}")
524-
parts.append(rec["description"])
525-
return f"{EMBEDDING_DOCUMENT_PREFIX}{'. '.join(parts)}"
526-
527-
texts = [_embedding_text(records[n]) for n in ordered_names]
528-
embedder = TextEmbedding(model_name=DEFAULT_EMBEDDING_MODEL)
529-
embeddings = list(embedder.embed(texts))
530-
logger.info("Computed %d embeddings", len(embeddings))
531499

532-
# Write SQLite
533500
search_db_path.parent.mkdir(parents=True, exist_ok=True)
534501
conn = sqlite3.connect(search_db_path)
535502
try:
536503
_ensure_search_tables(conn)
537504

538-
# Clear and repopulate
539505
conn.execute("DELETE FROM icon_descriptions")
540506
conn.execute("DELETE FROM icon_embeddings")
541507

@@ -568,11 +534,12 @@ def _embedding_text(rec: DescriptionRecord) -> str:
568534
(icon_name, int(cid), theme),
569535
)
570536

571-
# Metadata — use first record's lucide_version as representative
537+
# Metadata
572538
first = records[ordered_names[0]]
539+
resolved_version = version or first.get("lucide_version", "unknown")
573540
now = datetime.now(tz=timezone.utc).isoformat()
574541
for key, value in [
575-
("version", first.get("lucide_version", "unknown")),
542+
("version", resolved_version),
576543
("embedding_model", DEFAULT_EMBEDDING_MODEL),
577544
("embedding_dim", str(DEFAULT_EMBEDDING_DIM)),
578545
("description_model", DEFAULT_VLM_MODEL),
@@ -593,6 +560,91 @@ def _embedding_text(rec: DescriptionRecord) -> str:
593560
logger.info("Search DB written: %s (%.0f KB)", search_db_path, size_kb)
594561

595562

563+
def _read_icons_db_info(
564+
icons_db_path: pathlib.Path,
565+
) -> tuple[set[str], str | None]:
566+
"""Read icon names and version from the icons database."""
567+
conn = sqlite3.connect(icons_db_path)
568+
try:
569+
names = {r[0] for r in conn.execute("SELECT name FROM icons").fetchall()}
570+
row = conn.execute(
571+
"SELECT value FROM metadata WHERE key = 'version'"
572+
).fetchone()
573+
return names, row[0] if row else None
574+
finally:
575+
conn.close()
576+
577+
578+
def build_search_db(
579+
jsonl_path: pathlib.Path,
580+
search_db_path: pathlib.Path,
581+
clusters_path: pathlib.Path,
582+
*,
583+
icons_db_path: pathlib.Path | None = None,
584+
verbose: bool = False,
585+
) -> None:
586+
"""Build the SQLite search database from descriptions, embeddings, and clusters.
587+
588+
Reads descriptions from *jsonl_path*, computes embeddings with fastembed,
589+
loads cluster assignments from *clusters_path*, and writes everything to
590+
*search_db_path*. The DB is rebuilt from scratch each time.
591+
592+
When *icons_db_path* is provided, only icons present in that database are
593+
included and the version metadata is read from it.
594+
595+
Args:
596+
jsonl_path: Input JSONL file with VLM descriptions.
597+
search_db_path: Output SQLite database path.
598+
clusters_path: JSON file with cluster assignments.
599+
icons_db_path: Optional icons database for filtering and version.
600+
verbose: Verbose logging.
601+
"""
602+
from fastembed import TextEmbedding # noqa: PLC0415
603+
604+
records = load_descriptions_jsonl(jsonl_path)
605+
if not records:
606+
logger.warning("No descriptions found in %s", jsonl_path)
607+
return
608+
609+
# Filter to only icons present in the icons DB when provided
610+
icons_version: str | None = None
611+
if icons_db_path is not None:
612+
valid_names, icons_version = _read_icons_db_info(icons_db_path)
613+
skipped = sorted(set(records.keys()) - valid_names)
614+
if skipped:
615+
logger.info("Skipping %d stale descriptions: %s", len(skipped), skipped)
616+
ordered_names = sorted(n for n in records if n in valid_names)
617+
else:
618+
ordered_names = sorted(records.keys())
619+
620+
logger.info("Building search DB from %d descriptions", len(ordered_names))
621+
622+
# Build embedding input: name + tags + categories + description
623+
def _embedding_text(rec: DescriptionRecord) -> str:
624+
parts = [rec["name"].replace("-", " ")]
625+
if rec["tags"]:
626+
parts.append(f"Tags: {', '.join(rec['tags'])}")
627+
if rec["categories"]:
628+
parts.append(f"Categories: {', '.join(rec['categories'])}")
629+
parts.append(rec["description"])
630+
return f"{EMBEDDING_DOCUMENT_PREFIX}{'. '.join(parts)}"
631+
632+
texts = [_embedding_text(records[n]) for n in ordered_names]
633+
embedder = TextEmbedding(model_name=DEFAULT_EMBEDDING_MODEL)
634+
embeddings = list(embedder.embed(texts))
635+
logger.info("Computed %d embeddings", len(embeddings))
636+
637+
_write_search_db(
638+
search_db_path,
639+
ordered_names,
640+
records,
641+
embeddings,
642+
clusters_path,
643+
version=icons_version,
644+
verbose=verbose,
645+
)
646+
647+
596648
# ---------------------------------------------------------------------------
597649
# Combined pipeline
598650
# ---------------------------------------------------------------------------
@@ -638,6 +690,7 @@ def build_search_data( # noqa: PLR0913
638690
jsonl_path,
639691
search_db_path,
640692
clusters_path,
693+
icons_db_path=main_db_path,
641694
verbose=verbose,
642695
)
643696

src/lucide/cli.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,9 +490,15 @@ def _cmd_build_search(args: argparse.Namespace) -> int:
490490
args.output or descriptions_file.parent / "lucide-search.db"
491491
)
492492

493+
icons_db = pathlib.Path(args.icons_db) if args.icons_db else None
494+
493495
try:
494496
build_search_db(
495-
descriptions_file, search_db, clusters_file, verbose=args.verbose
497+
descriptions_file,
498+
search_db,
499+
clusters_file,
500+
icons_db_path=icons_db,
501+
verbose=args.verbose,
496502
)
497503
print(f"Search DB built → {search_db}")
498504
except Exception as e:
@@ -786,6 +792,11 @@ def main() -> int:
786792
help="Input JSON file with cluster assignments",
787793
required=True,
788794
)
795+
build_parser.add_argument(
796+
"--icons-db",
797+
help="Icons database for filtering and version metadata",
798+
default=None,
799+
)
789800
build_parser.add_argument(
790801
"-o",
791802
"--output",

src/lucide/data/gemini-icon-descriptions.jsonl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,3 +1701,12 @@
17011701
{"name": "zodiac-virgo", "description": "This icon depicts the astrological symbol for Virgo. It represents virginity, maidenhood, and the harvest. It also evokes concepts of precision, analysis, and earthiness, often associated with this star sign and horoscope.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "0.577.0", "tags": ["virgin", "maiden", "harvest", "precision", "earth", "analysis", "astrology", "star sign", "horoscope", "constellation", "celestial"], "categories": ["social", "emoji"], "timestamp": "2026-03-26T04:45:28.261407+00:00"}
17021702
{"name": "zoom-in", "description": "This icon visually represents a magnifying glass with a plus symbol inside its lens. It symbolizes the action of increasing detail, focusing on specifics, or making something larger to improve visibility and understanding. It conveys an idea of enhancement and closer examination.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "0.577.0", "tags": ["magnifying glass", "plus"], "categories": ["accessibility", "layout", "design", "text", "photography"], "timestamp": "2026-03-26T04:45:27.741436+00:00"}
17031703
{"name": "zoom-out", "description": "This icon depicts a magnifying glass with a minus sign inside, symbolizing the action of zooming out. It represents a reduction in scale, offering a broader perspective or a step back from fine details, often associated with exploration and comprehension.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "0.577.0", "tags": ["magnifying glass", "plus"], "categories": ["accessibility", "layout", "design", "text", "photography"], "timestamp": "2026-03-26T04:45:27.741498+00:00"}
1704+
{"name": "map-pin-search", "description": "This icon visually represents a map pin with a magnifying glass superimposed over it. It signifies finding or locating specific points on a map, searching for destinations, or pinpointing a particular place with precision.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["location", "navigation", "travel", "waypoint", "marker", "drop"], "categories": ["text", "navigation", "travel", "account"], "timestamp": "2026-04-02T04:35:47.179475+00:00"}
1705+
{"name": "beef-off", "description": "This icon visually depicts a stylized ear with sound waves emanating from it, crossed by a diagonal line. It represents the concept of muting, silencing, or turning off sound. It can signify a desire for quiet or a state of no audio.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["food", "dish", "restaurant", "course", "meal", "meat", "bbq", "steak", "vegetarian"], "categories": ["food-beverage"], "timestamp": "2026-04-02T04:35:47.199205+00:00"}
1706+
{"name": "shield-cog-corner", "description": "This icon visually combines a shield and a cogwheel, suggesting robust security settings and advanced protection. It represents strong defense, vigilant system management, and the controlled application of safety measures for robust, secure operations.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["cybersecurity", "secure", "safety", "protection", "guardian", "armored", "armoured", "defense", "defence", "defender", "block", "threat", "prevention", "antivirus", "vigilance", "vigilant", "detection", "scan", "find", "strength", "strong", "tough", "invincible", "invincibility", "invulnerable", "undamaged", "audit", "admin", "verification", "crest", "shieldcog", "bravery", "knight", "foot soldier", "infantry", "trooper", "pawn", "battle", "war", "military", "army", "cadet", "scout"], "categories": ["account", "security", "development", "gaming", "shapes"], "timestamp": "2026-04-02T04:35:47.248178+00:00"}
1707+
{"name": "radio-off", "description": "This icon visually represents a crossed-out signal radiating outwards, indicating the absence of a radio broadcast or signal. It conveys concepts of disconnection, muting, or the deactivation of live transmission and frequency.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["signal", "broadcast", "connectivity", "live", "frequency"], "categories": ["devices", "multimedia", "social"], "timestamp": "2026-04-02T04:35:47.251911+00:00"}
1708+
{"name": "road", "description": "This icon depicts a stylized road or highway, with a dashed line dividing lanes. It represents travel, journey, direction, and progress, evoking concepts of navigation, routes, and transportation.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["road", "street", "highway", "route", "path", "transport", "traffic", "drive", "map"], "categories": ["transportation"], "timestamp": "2026-04-02T04:35:47.267447+00:00"}
1709+
{"name": "shield-cog", "description": "This icon visually depicts a shield with a gear inside, symbolizing robust protection and mechanical or operational security. It represents concepts of defense, system integrity, administrative control, and the strength to withstand threats through vigilant oversight and proactive management.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["cybersecurity", "secure", "safety", "protection", "guardian", "armored", "armoured", "defense", "defence", "defender", "block", "threat", "prevention", "antivirus", "vigilance", "vigilant", "detection", "scan", "find", "strength", "strong", "tough", "invincible", "invincibility", "invulnerable", "undamaged", "audit", "admin", "verification", "crest", "bravery", "knight", "foot soldier", "infantry", "trooper", "pawn", "battle", "war", "military", "army", "cadet", "scout"], "categories": ["account", "security", "development", "gaming", "shapes"], "timestamp": "2026-04-02T04:35:47.351638+00:00"}
1710+
{"name": "sport-shoe", "description": "This icon shows a stylized athletic shoe in motion, with a distinct sole and laces detail. It represents activities like running, training, and general fitness, evoking concepts of movement, speed, and active exercise.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["footwear", "sports", "running", "athletic", "shoe", "sneaker", "training", "exercise", "fitness"], "categories": ["sports"], "timestamp": "2026-04-02T04:35:47.456484+00:00"}
1711+
{"name": "line-style", "description": "This icon visually represents a set of horizontal dashed lines, varying in length, with a solid line below them. It symbolizes different line styles, strokes, and borders, suggesting options for design or outlining elements. It conveys a sense of structure, pattern, and customizable aesthetics.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["line", "stroke", "style", "dashed", "border"], "categories": ["design", "tools"], "timestamp": "2026-04-02T04:35:47.466965+00:00"}
1712+
{"name": "cctv-off", "description": "This icon shows a stylized camera lens crossed out by a diagonal line, indicating deactivation. It represents the absence of surveillance or recording, signifying a security system being offline, inactive, or disabled, suggesting a state of being unmonitored.", "model": "gemini-2.5-flash-lite", "prompt_template_hash": "7ae15a902cba", "lucide_version": "1.7.0", "tags": ["camera", "surveillance", "recording", "film", "videotape", "crime", "watching"], "categories": ["security", "devices", "communication", "connectivity", "photography"], "timestamp": "2026-04-02T04:35:47.629215+00:00"}

src/lucide/data/lucide-icons.db

-692 KB
Binary file not shown.

src/lucide/data/lucide-search.db

-40 KB
Binary file not shown.

0 commit comments

Comments
 (0)