before refactor index store to complex file-line pattern

This commit is contained in:
2026-03-12 16:13:44 +06:00
parent 491ccbea89
commit 8ba956d8c5
21 changed files with 7804 additions and 57 deletions

View File

@@ -1,2 +1,3 @@
{"age":31,"created":"2026-03-04T13:58:25+06:00","email":"john@example.com","id":1,"name":"John Doe"}
{"age":35,"created":"2026-03-04T13:58:25+06:00","email":"bob@example.com","id":3,"name":"Bob Johnson"}
{"age":31,"created":"2026-03-06T16:33:26+06:00","email":"john@example.com","id":1,"name":"John Doe"}
{"age":35,"created":"2026-03-06T16:33:26+06:00","email":"bob@example.com","id":3,"name":"Bob Johnson"}

View File

View File

@@ -0,0 +1,2 @@
{"id":1,"status":"processed","tenant":"A","ts":1773305152,"type":"signup"}
{"id":2,"status":"processed","tenant":"A","ts":1773305152,"type":"purchase"}

View File

@@ -0,0 +1 @@
{"id":3,"status":"new","tenant":"B","ts":1773305152,"type":"signup"}

View File

@@ -0,0 +1,2 @@
{"createdAt":"2026-03-12T14:45:52.66645719+06:00","email":"a@example.com","id":1,"name":"Alice"}
{"createdAt":"2026-03-12T14:45:52.666505533+06:00","email":"b@example.com","id":2,"name":"Bob"}

161
examples/partitions/main.go Normal file
View File

@@ -0,0 +1,161 @@
package main
import (
"fmt"
"log"
"os"
"path/filepath"
"time"
"linedb/pkg/linedb"
)
// Пример работы с партициями + индексируемой коллекцией.
//
// Запуск:
// go run ./examples/partitions/main.go
//
// Важно:
// - В текущей реализации индексы строятся для "обычных" коллекций.
// - Партиционированные коллекции (партиции) создаются динамически и сейчас не индексируются
// (см. getPartitionAdapter: JSONLFileOptions{CollectionName: partitionName} без IndexedFields).
func main() {
dbDir := filepath.Join(".", "examples", "partitions", "data")
_ = os.RemoveAll(dbDir)
if err := os.MkdirAll(dbDir, 0755); err != nil {
log.Fatalf("mkdir: %v", err)
}
// Настройка:
// - users: обычная коллекция с индексами
// - events: базовая коллекция для партиций (сама по себе не используется для записи),
// а реальные данные будут в events_<tenant>.jsonl
initOptions := &linedb.LineDbInitOptions{
DBFolder: dbDir,
Collections: []linedb.JSONLFileOptions{
{
CollectionName: "users",
AllocSize: 512,
IndexedFields: []string{"id", "email"},
},
{
CollectionName: "events",
AllocSize: 512,
},
},
Partitions: []linedb.PartitionCollection{
{
CollectionName: "events",
PartIDFn: func(v any) string {
m, ok := v.(map[string]any)
if !ok {
return "unknown"
}
tenant, ok := m["tenant"].(string)
if !ok || tenant == "" {
return "unknown"
}
return tenant
},
},
},
}
db := linedb.NewLineDb(&linedb.LineDbOptions{
IndexStore: linedb.NewInMemoryIndexStore(),
})
if err := db.Init(true, initOptions); err != nil {
log.Fatalf("Init failed: %v", err)
}
defer db.Close()
fmt.Println("=== 1) Индексируемая коллекция users ===")
users := []any{
map[string]any{"id": 1, "email": "a@example.com", "name": "Alice", "createdAt": time.Now().Format(time.RFC3339Nano)},
map[string]any{"id": 2, "email": "b@example.com", "name": "Bob", "createdAt": time.Now().Format(time.RFC3339Nano)},
}
// Используем Write + DoIndexing: true, чтобы индекс был актуален сразу после записи.
if err := db.Write(users, "users", linedb.LineDbAdapterOptions{DoIndexing: true}); err != nil {
log.Fatalf("Write users failed: %v", err)
}
byEmail, err := db.ReadByFilter(map[string]any{"email": "a@example.com"}, "users", linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("ReadByFilter users by email failed: %v", err)
}
mustLen("users by email", byEmail, 1)
byID, err := db.ReadByFilter(map[string]any{"id": 2}, "users", linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("ReadByFilter users by id failed: %v", err)
}
mustLen("users by id", byID, 1)
fmt.Println("OK: users inserted and searchable by indexed fields (id/email).")
fmt.Println("\n=== 2) Партиционированная коллекция events (events_<tenant>) ===")
events := []any{
map[string]any{"id": 1, "tenant": "A", "type": "signup", "status": "new", "ts": time.Now().Unix()},
map[string]any{"id": 2, "tenant": "A", "type": "purchase", "status": "new", "ts": time.Now().Unix()},
map[string]any{"id": 3, "tenant": "B", "type": "signup", "status": "new", "ts": time.Now().Unix()},
}
if err := db.Insert(events, "events", linedb.LineDbAdapterOptions{}); err != nil {
log.Fatalf("Insert events failed: %v", err)
}
tenantA, err := db.ReadByFilter(map[string]any{"tenant": "A"}, "events", linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("ReadByFilter events tenant A failed: %v", err)
}
mustLen("events tenant A after insert", tenantA, 2)
tenantB, err := db.ReadByFilter(map[string]any{"tenant": "B"}, "events", linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("ReadByFilter events tenant B failed: %v", err)
}
mustLen("events tenant B after insert", tenantB, 1)
fmt.Println("OK: события разложены по партициям (A и B).")
fmt.Println("\n=== 3) Update по всем партициям ===")
updated, err := db.Update(
map[string]any{"status": "processed"},
"events",
map[string]any{"tenant": "A"},
linedb.LineDbAdapterOptions{},
)
if err != nil {
log.Fatalf("Update events failed: %v", err)
}
mustLen("updated events for tenant A", updated, 2)
processedA, err := db.ReadByFilter(map[string]any{"tenant": "A", "status": "processed"}, "events", linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("ReadByFilter processed events failed: %v", err)
}
mustLen("processed events for tenant A", processedA, 2)
fmt.Println("OK: обновление затронуло записи в партиции A.")
fmt.Println("\n=== 4) Delete по всем партициям ===")
deleted, err := db.Delete(map[string]any{"id": 3}, "events", linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("Delete events failed: %v", err)
}
mustLen("deleted events id=3", deleted, 1)
allRemaining, err := db.ReadByFilter(nil, "events", linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("ReadByFilter all remaining events failed: %v", err)
}
mustLen("remaining events after delete", allRemaining, 2)
fmt.Printf("\nГотово. Данные примера в: %s\n", dbDir)
}
func mustLen(label string, got []any, want int) {
if len(got) != want {
log.Fatalf("%s: expected %d, got %d (%v)", label, want, len(got), got)
}
}

View File

@@ -5,7 +5,7 @@
//
// Сценарий:
// 1. Вставка 5000 записей, каждая ~1800 символов.
// 2. 10 случайных обновлений, замер среднего времени.
// 2. N выборок ReadByFilter по индексируемому полю, замер среднего времени.
// 3. 100 случайных удалений, замер среднего времени.
// 4. Замеры памяти процесса через runtime.MemStats.
package main
@@ -15,6 +15,7 @@ import (
"log"
"math/rand"
"os"
"path/filepath"
"runtime"
"strings"
"time"
@@ -23,79 +24,152 @@ import (
)
const (
recordsCount = 5000
payloadSize = 1800
updateOps = 100
deleteOps = 100
recordsCount = 3000
payloadSize = 1024
readOps = 2000
updateOps = 500
deleteOps = 500
collectionName = "perf_items"
dbDir = "./data/perf-benchmark"
baseDBDir = "./data/perf-benchmark"
allocSizeEstimate = 2048
)
func main() {
rand.Seed(time.Now().UnixNano())
if err := os.MkdirAll(dbDir, 0755); err != nil {
if err := os.MkdirAll(baseDBDir, 0755); err != nil {
log.Fatalf("mkdir: %v", err)
}
fmt.Printf("LineDB perf сравнение: без индекса vs с индексом\n")
fmt.Printf("records=%d payload~%d readOps=%d updateOps=%d deleteOps=%d allocSize=%d\n\n",
recordsCount, payloadSize, readOps, updateOps, deleteOps, allocSizeEstimate)
noIdx := runScenario(false)
runtime.GC()
withIdx := runScenario(true)
fmt.Printf("\n=== Сводка (рядом) ===\n")
fmt.Printf("%-26s | %-18s | %-18s\n", "Метрика", "Без индекса", "С индексом")
fmt.Printf("%-26s | %-18v | %-18v\n", "Insert total", noIdx.insertTotal, withIdx.insertTotal)
fmt.Printf("%-26s | %-18v | %-18v\n", "Index build", noIdx.indexBuildTotal, withIdx.indexBuildTotal)
fmt.Printf("%-26s | %-18v | %-18v\n", "ReadByFilter avg", noIdx.readAvg, withIdx.readAvg)
fmt.Printf("%-26s | %-18v | %-18v\n", "Update avg", noIdx.updateAvg, withIdx.updateAvg)
fmt.Printf("%-26s | %-18v | %-18v\n", "Delete avg", noIdx.deleteAvg, withIdx.deleteAvg)
fmt.Printf("%-26s | %-18d | %-18d\n", "Final records", noIdx.finalCount, withIdx.finalCount)
fmt.Printf("%-26s | %-18.2f | %-18.2f\n", "Mem Alloc (MB)", noIdx.memAllocMB, withIdx.memAllocMB)
fmt.Printf("%-26s | %-18.2f | %-18.2f\n", "Mem Sys (MB)", noIdx.memSysMB, withIdx.memSysMB)
fmt.Printf("%-26s | %-18d | %-18d\n", "NumGC", noIdx.numGC, withIdx.numGC)
fmt.Printf("\nData directories:\n no-index: %s\n with-index:%s\n", noIdx.dbDir, withIdx.dbDir)
}
type scenarioResult struct {
dbDir string
insertTotal time.Duration
indexBuildTotal time.Duration
readAvg time.Duration
updateAvg time.Duration
deleteAvg time.Duration
finalCount int
memAllocMB float64
memSysMB float64
numGC uint32
}
func runScenario(useIndex bool) scenarioResult {
label := "no-index"
if useIndex {
label = "with-index"
}
dbDir := filepath.Join(baseDBDir, label)
_ = os.RemoveAll(dbDir)
var store linedb.IndexStore
lineOpts := linedb.LineDbOptions{}
collOpts := linedb.JSONLFileOptions{
CollectionName: collectionName,
AllocSize: allocSizeEstimate,
}
if useIndex {
// Индексируем поля id и index (по ним идут фильтры в тесте)
collOpts.IndexedFields = []string{"id", "index"}
store = linedb.NewInMemoryIndexStore()
lineOpts.IndexStore = store
}
initOptions := &linedb.LineDbInitOptions{
CacheSize: 1000,
CacheTTL: time.Minute,
DBFolder: dbDir,
Collections: []linedb.JSONLFileOptions{
{
CollectionName: collectionName,
AllocSize: allocSizeEstimate,
},
collOpts,
},
}
db := linedb.NewLineDb(nil)
db := linedb.NewLineDb(&lineOpts)
if err := db.Init(true, initOptions); err != nil {
log.Fatalf("Init failed: %v", err)
log.Fatalf("[%s] Init failed: %v", label, err)
}
defer db.Close()
fmt.Printf("=== %s ===\n", label)
printMem("Before insert")
fmt.Printf("1) Insert %d records (payload ~%d chars)...\n", recordsCount, payloadSize)
fmt.Printf("1) Insert %d records...\n", recordsCount)
start := time.Now()
if err := insertRecords(db); err != nil {
log.Fatalf("InsertRecords failed: %v", err)
log.Fatalf("[%s] insertRecords failed: %v", label, err)
}
elapsedInsert := time.Since(start)
fmt.Printf(" Total insert time: %v, per record: %v\n",
elapsedInsert, elapsedInsert/time.Duration(recordsCount))
insertDur := time.Since(start)
fmt.Printf(" Total insert time: %v, per record: %v\n", insertDur, insertDur/time.Duration(recordsCount))
// Индекс строится внутри Write при DoIndexing: true (точечная индексация)
printMem("After insert")
all, err := db.Read(collectionName, linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("Read after insert failed: %v", err)
log.Fatalf("[%s] Read after insert failed: %v", label, err)
}
fmt.Printf(" Records in collection: %d\n", len(all))
ids := collectIDs(all)
if len(ids) == 0 {
log.Fatalf("No IDs collected, cannot continue")
indexVals := collectFieldValues(all, "index")
if len(ids) == 0 || len(indexVals) == 0 {
log.Fatalf("[%s] No IDs or index values collected, cannot continue", label)
}
fmt.Printf("\n2) Random update of %d records...\n", updateOps)
fmt.Printf("\n2) Random ReadByFilter of %d ops (field=index)...\n", readOps)
readAvg := benchmarkReads(db, indexVals, readOps)
fmt.Printf(" Average ReadByFilter time: %v\n", readAvg)
fmt.Printf("\n3) Random update of %d records...\n", updateOps)
avgUpdate := benchmarkUpdates(db, ids, updateOps)
fmt.Printf(" Average update time: %v\n", avgUpdate)
printMem("After updates")
fmt.Printf("\n3) Random delete of %d records...\n", deleteOps)
fmt.Printf("\n4) Random delete of %d records...\n", deleteOps)
avgDelete := benchmarkDeletes(db, ids, deleteOps)
fmt.Printf(" Average delete time: %v\n", avgDelete)
printMem("After deletes")
final, err := db.Read(collectionName, linedb.LineDbAdapterOptions{})
if err != nil {
log.Fatalf("Final read failed: %v", err)
log.Fatalf("[%s] Final read failed: %v", label, err)
}
fmt.Printf("\nFinal records in collection: %d\n", len(final))
fmt.Printf("Data directory: %s\n", dbDir)
mem := memSnapshot()
return scenarioResult{
dbDir: dbDir,
insertTotal: insertDur,
indexBuildTotal: 0, // индекс строится точечно при Write с DoIndexing
readAvg: readAvg,
updateAvg: avgUpdate,
deleteAvg: avgDelete,
finalCount: len(final),
memAllocMB: mem.allocMB,
memSysMB: mem.sysMB,
numGC: mem.numGC,
}
}
func insertRecords(db *linedb.LineDb) error {
@@ -105,20 +179,21 @@ func insertRecords(db *linedb.LineDb) error {
batch := make([]any, 0, 100)
for i := 0; i < recordsCount; i++ {
rec := map[string]any{
"id": i + 1,
"index": i,
"payload": base,
"created": time.Now().Format(time.RFC3339Nano),
}
batch = append(batch, rec)
if len(batch) >= cap(batch) {
if err := db.Insert(batch, collectionName, linedb.LineDbAdapterOptions{}); err != nil {
if err := db.Write(batch, collectionName, linedb.LineDbAdapterOptions{DoIndexing: true}); err != nil {
return err
}
batch = batch[:0]
}
}
if len(batch) > 0 {
if err := db.Insert(batch, collectionName, linedb.LineDbAdapterOptions{}); err != nil {
if err := db.Write(batch, collectionName, linedb.LineDbAdapterOptions{DoIndexing: true}); err != nil {
return err
}
}
@@ -137,6 +212,37 @@ func collectIDs(all []any) []any {
return ids
}
func collectFieldValues(all []any, field string) []any {
vals := make([]any, 0, len(all))
for _, r := range all {
if m, ok := r.(map[string]any); ok {
if v, ok := m[field]; ok {
vals = append(vals, v)
}
}
}
return vals
}
func benchmarkReads(db *linedb.LineDb, values []any, ops int) time.Duration {
if len(values) == 0 || ops == 0 {
return 0
}
var total time.Duration
for i := 0; i < ops; i++ {
v := values[rand.Intn(len(values))]
start := time.Now()
_, err := db.ReadByFilter(map[string]any{"index": v}, collectionName, linedb.LineDbAdapterOptions{})
dur := time.Since(start)
if err != nil {
log.Printf("ReadByFilter error (index=%v): %v", v, err)
continue
}
total += dur
}
return total / time.Duration(ops)
}
func benchmarkUpdates(db *linedb.LineDb, ids []any, ops int) time.Duration {
if len(ids) == 0 {
return 0
@@ -204,3 +310,19 @@ func printMem(label string) {
fmt.Printf(" Sys: %.2f MB\n", float64(m.Sys)/1024.0/1024.0)
fmt.Printf(" NumGC: %d\n", m.NumGC)
}
type memSnap struct {
allocMB float64
sysMB float64
numGC uint32
}
func memSnapshot() memSnap {
var m runtime.MemStats
runtime.ReadMemStats(&m)
return memSnap{
allocMB: float64(m.Alloc) / 1024.0 / 1024.0,
sysMB: float64(m.Sys) / 1024.0 / 1024.0,
numGC: m.NumGC,
}
}