Дата: 2026-01-02
Статус: Проектный документ
Уровень: Архитектор
Построить документ-систему, которая объединяет:
1. Портативность Git (source control, versioning)
2. Гибкость блоков (Notion-like flexibility)
3. Простоту Markdown (легко читать/писать)
4. Сотрудничество real-time collaboration
5. Расширяемость (plugins, custom blocks)
┌──────────────────────────────────────────────────────┐
│ Layer 1: Git Storage │
│ (Source of truth: markdown + config) │
└──────────────────────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────┐
│ Layer 2: Database + Cache │
│ (Metadata, search index, real-time state) │
└──────────────────────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────┐
│ Layer 3: Block Parser & Renderer │
│ (Markdown → Block AST → UI) │
└──────────────────────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────┐
│ Layer 4: API (REST + WebSocket) │
│ (CRUD, real-time, integrations) │
└──────────────────────────────────────────────────────┘
↓
┌──────────────────────────────────────────────────────┐
│ Layer 5: Editor UI │
│ (Block editor + Markdown view + Collaboration) │
└──────────────────────────────────────────────────────┘
docs-repo/
├── .docs.config.yaml # System config
├── .gitignore
│
├── _collections/
│ ├── config.yaml # Collection metadata
│ └── index.yaml # Collection TOC
│
├── documents/
│ ├── doc-001/
│ │ ├── meta.yaml # Document metadata
│ │ ├── content.md # Main document
│ │ ├── attachments/
│ │ │ ├── image1.png
│ │ │ └── video1.mp4
│ │ └── comments.json # Comments (separate)
│ ├── doc-002/
│ └── ...
│
├── _media/ # Shared assets
│ ├── images/
│ ├── videos/
│ └── icons/
│
└── _archived/ # Old documents
id: "doc-001"
title: "Getting Started Guide"
description: "Quick start for new users"
# Hierarchy
parent_id: null # null = root document
children_ids: [doc-002, doc-003]
position: 1 # Order in parent
# Authorship & versioning
created_by: "user@example.com"
created_at: "2026-01-01T10:00:00Z"
updated_by: "editor@example.com"
updated_at: "2026-01-02T15:30:00Z"
# Metadata
tags: [guide, onboarding, v1.0]
category: "tutorials"
status: "published" # draft, published, archived
# Permissions
owner: "user@example.com"
viewers: ["team@example.com"]
editors: ["editor@example.com"]
# Content references
content_file: "content.md"
attachments: [image1.png, video1.mp4]
# Custom fields
custom_fields:
version: "1.0"
difficulty: "beginner"
estimated_time: "15 mins"
# Getting Started
## Introduction
[Standard markdown content]
## Installation
[Steps...]
<!-- Custom block syntax -->
<docs:callout type="info">
**Info:** This is important
</docs:callout>
<docs:code-block language="python">
def hello():
print("world")
</docs:code-block>
<docs:embed type="video" src="attachments/video1.mp4">
<docs:table-of-contents>
<docs:alert type="warning">
Be careful!
</docs:alert>
[Rest of markdown...]
# System settings
system:
name: "Documentation System"
version: "1.0"
db_type: "postgresql"
# Storage
storage:
backend: "git"
git_remote: "https://github.com/org/docs"
sync:
direction: "bidirectional" # one-way or two-way
interval: 300 # seconds
conflict_strategy: "last-write-wins"
# Editor settings
editor:
mode: "block-based" # or "markdown"
enable_wysiwyg: true
enable_source_view: true
block_types: [paragraph, heading, code, image, table, ...]
# Search
search:
engine: "elasticsearch"
index_attachments: true
fulltext_enabled: true
# Collaboration
collaboration:
realtime_enabled: true
max_concurrent_edits: 10
comment_system: "threaded"
# Export/Import
export:
formats: [markdown, html, pdf, json]
include_attachments: true
# Permissions
permissions:
default_viewer: ["authenticated"]
default_editor: ["team"]
owner_only: ["delete", "transfer_ownership"]
-- Documents table
CREATE TABLE documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
parent_id UUID REFERENCES documents(id) ON DELETE CASCADE,
title VARCHAR(255) NOT NULL,
description TEXT,
content TEXT NOT NULL, -- Markdown
created_by UUID NOT NULL REFERENCES users(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_by UUID NOT NULL REFERENCES users(id),
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
status VARCHAR(20) DEFAULT 'draft', -- draft, published, archived
category VARCHAR(100),
tags TEXT[] DEFAULT '{}',
position INTEGER, -- Order in parent
version_hash VARCHAR(64), -- Git commit hash
metadata JSONB, -- Custom fields
search_vector tsvector GENERATED ALWAYS AS
(to_tsvector('english', title || ' ' || description)) STORED
);
CREATE INDEX documents_parent_idx ON documents(parent_id);
CREATE INDEX documents_search_idx ON documents USING GIN(search_vector);
CREATE INDEX documents_status_idx ON documents(status);
CREATE INDEX documents_category_idx ON documents(category);
-- Versions table (Git commits)
CREATE TABLE versions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents(id),
git_commit_hash VARCHAR(64) NOT NULL,
title VARCHAR(255),
message TEXT,
author_id UUID NOT NULL REFERENCES users(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
content TEXT, -- Snapshot
metadata JSONB
);
CREATE INDEX versions_document_idx ON versions(document_id);
CREATE INDEX versions_commit_idx ON versions(git_commit_hash);
-- Blocks table (parsed markdown blocks)
CREATE TABLE blocks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents(id),
parent_block_id UUID REFERENCES blocks(id),
type VARCHAR(50), -- paragraph, heading, code, image, etc.
content TEXT, -- Block content (JSON or text)
properties JSONB, -- Block metadata
position INTEGER, -- Order in parent
created_by UUID REFERENCES users(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_by UUID REFERENCES users(id),
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX blocks_document_idx ON blocks(document_id);
CREATE INDEX blocks_parent_idx ON blocks(parent_block_id);
CREATE INDEX blocks_type_idx ON blocks(type);
-- Comments table
CREATE TABLE comments (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents(id),
block_id UUID REFERENCES blocks(id),
parent_comment_id UUID REFERENCES comments(id),
author_id UUID NOT NULL REFERENCES users(id),
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
resolved BOOLEAN DEFAULT false
);
CREATE INDEX comments_document_idx ON comments(document_id);
CREATE INDEX comments_block_idx ON comments(block_id);
CREATE INDEX comments_thread_idx ON comments(parent_comment_id);
-- Attachments table
CREATE TABLE attachments (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents(id),
filename VARCHAR(255) NOT NULL,
mimetype VARCHAR(100),
size_bytes BIGINT,
storage_path VARCHAR(255), -- S3 or local path
url VARCHAR(500), -- Public URL
created_by UUID REFERENCES users(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX attachments_document_idx ON attachments(document_id);
-- Permissions table
CREATE TABLE permissions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL REFERENCES documents(id),
user_id UUID REFERENCES users(id),
group_id UUID REFERENCES groups(id),
permission VARCHAR(20), -- view, edit, admin, own
granted_by UUID REFERENCES users(id),
granted_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
expires_at TIMESTAMP, -- Optional expiration
CHECK (user_id IS NOT NULL OR group_id IS NOT NULL)
);
CREATE INDEX permissions_document_idx ON permissions(document_id);
CREATE INDEX permissions_user_idx ON permissions(user_id);
-- Activity log (audit trail)
CREATE TABLE activity_log (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID REFERENCES documents(id),
action VARCHAR(50), -- created, updated, deleted, commented, etc.
actor_id UUID NOT NULL REFERENCES users(id),
details JSONB, -- What changed
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX activity_log_document_idx ON activity_log(document_id);
CREATE INDEX activity_log_created_idx ON activity_log(created_at);
Cache keys:
├── document:{doc_id} → Parsed blocks (TTL: 1 hour)
├── document:{doc_id}:metadata → Document metadata
├── search:index → Full-text index
├── user:{user_id}:permissions → User permissions (TTL: 30 min)
├── document:{doc_id}:collaborators → Active editors (TTL: 5 min)
└── document:{doc_id}:version → Git version info
blockTypes:
text:
- paragraph
- heading_1, heading_2, heading_3
- quote
list:
- bulleted_list
- numbered_list
- checklist
- toggle
media:
- image
- video
- audio
- file
data:
- table
- database # Inline database
- code_block
layout:
- columns
- tabs
- callout
- alert
interactive:
- button
- embed
- iframe
custom:
- (user-defined via plugins)
class MarkdownParser:
def parse(markdown: str) -> List[Block]:
"""Parse markdown to block AST"""
# 1. Tokenize
tokens = self.tokenize(markdown)
# 2. Parse to AST
ast = self.parse_tokens(tokens)
# 3. Convert to blocks
blocks = self.ast_to_blocks(ast)
return blocks
def ast_to_blocks(ast: AST) -> List[Block]:
"""Convert AST to Block objects"""
blocks = []
for node in ast.children:
block = Block(
type=self.node_type_to_block_type(node.type),
content=node.content,
properties={...},
children=self.ast_to_blocks(node.children)
)
blocks.append(block)
return blocks
class BlockRenderer:
def render(blocks: List[Block]) -> HTML:
"""Render blocks to HTML"""
html = []
for block in blocks:
renderer = self.get_renderer(block.type)
html.append(renderer.render(block))
return "".join(html)
# Example renderers
class ParagraphRenderer:
def render(block: Block) -> HTML:
return f"<p>{block.content}</p>"
class HeadingRenderer:
def render(block: Block) -> HTML:
level = block.properties.get("level", 1)
return f"<h{level}>{block.content}</h{level}>"
class CodeBlockRenderer:
def render(block: Block) -> HTML:
language = block.properties.get("language", "text")
return f"""
<pre>
<code class="language-{language}">{block.content}</code>
</pre>
"""
# Documents
GET /api/v1/documents # List
POST /api/v1/documents # Create
GET /api/v1/documents/{id} # Read
PATCH /api/v1/documents/{id} # Update (full)
PUT /api/v1/documents/{id} # Update (partial)
DELETE /api/v1/documents/{id} # Delete
# Document hierarchy
GET /api/v1/documents/{id}/children
POST /api/v1/documents/{id}/children
PATCH /api/v1/documents/{id}/reorder
# Blocks
GET /api/v1/documents/{id}/blocks
POST /api/v1/documents/{id}/blocks
PATCH /api/v1/blocks/{block_id}
DELETE /api/v1/blocks/{block_id}
# Versions
GET /api/v1/documents/{id}/versions
GET /api/v1/documents/{id}/versions/{commit_hash}
POST /api/v1/documents/{id}/versions/{commit_hash}/restore
# Comments
GET /api/v1/documents/{id}/comments
POST /api/v1/documents/{id}/comments
PATCH /api/v1/comments/{comment_id}
DELETE /api/v1/comments/{comment_id}
# Search
GET /api/v1/search?q=query&limit=10
# Attachments
POST /api/v1/documents/{id}/attachments (upload)
GET /api/v1/attachments/{id} (download)
DELETE /api/v1/attachments/{id}
# Permissions
GET /api/v1/documents/{id}/permissions
POST /api/v1/documents/{id}/permissions
DELETE /api/v1/documents/{id}/permissions/{perm_id}
# Collaboration
GET /api/v1/documents/{id}/collaborators # Who's editing now
// Client connects
ws.connect('ws://server/api/v1/documents/{id}/collaborate')
// Server broadcasts changes
{
type: "document_changed",
change: {
id: "change-123",
blocks: [
{id: "block-1", type: "update", content: "new text"},
{id: "block-2", type: "insert", position: 1, content: {...}}
],
user_id: "user-xyz",
timestamp: 1234567890
}
}
// Client sends local changes
{
type: "document_change",
change: {
blocks: [
{id: "block-3", type: "update", content: "my change"}
]
}
}
// Real-time cursor tracking
{
type: "cursor_position",
cursor: {
user_id: "user-abc",
block_id: "block-1",
position: 15,
selection: {start: 10, end: 20}
}
}
┌─────────────────────────────────────┐
│ Editor UI (React/Vue) │
├─────────────────────────────────────┤
│ BlockEditor Component │
│ ├─ Block List View │
│ ├─ Block Editor (WYSIWYG) │
│ ├─ Comments Sidebar │
│ └─ Toolbar + Formatting │
├─────────────────────────────────────┤
│ Markdown Source View │
├─────────────────────────────────────┤
│ Real-time Collaboration │
│ ├─ Active cursors │
│ ├─ Conflict resolution │
│ └─ Change detection │
└─────────────────────────────────────┘
interface EditorState {
documentId: string;
blocks: Block[];
// UI state
selectedBlockId?: string;
mode: 'wysiwyg' | 'markdown' | 'split';
// Collaboration state
collaborators: User[];
remoteChanges: Change[];
localChanges: Change[];
// History
history: Change[];
historyIndex: number;
}
Local edits in UI
↓ (WebSocket → API)
Change captured in DB
↓ (every 30 seconds OR on save)
Commit to Git
↓
Git push to remote
↓
Webhook triggers
↓
Other clients receive update
Scenario:
User A: Edit document locally
User B: Push to Git in parallel
Resolution:
1. Detect conflict (version hash mismatch)
2. Fetch latest from Git
3. Merge (prefer Git as source of truth)
4. Notify User A: "Document was updated, please refresh"
5. Sync UI
version: '3.8'
services:
postgres:
image: postgres:15
environment:
POSTGRES_DB: docs_db
POSTGRES_PASSWORD: password
volumes:
- postgres_data:/var/lib/postgresql/data
redis:
image: redis:7
ports:
- "6379:6379"
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.0.0
environment:
- discovery.type=single-node
ports:
- "9200:9200"
api:
build: ./backend
ports:
- "8080:8080"
depends_on:
- postgres
- redis
- elasticsearch
environment:
DATABASE_URL: postgresql://user:password@postgres/docs_db
REDIS_URL: redis://redis:6379
ELASTICSEARCH_URL: http://elasticsearch:9200
frontend:
build: ./frontend
ports:
- "3000:3000"
environment:
API_URL: http://api:8080
git-sync:
build: ./git-sync
depends_on:
- postgres
- api
environment:
GIT_REMOTE: https://github.com/org/docs
SYNC_INTERVAL: 300
# Application
- Documents created/updated/deleted per minute
- API response time (p50, p95, p99)
- WebSocket connections count
- Real-time collaboration latency
# Infrastructure
- DB query time
- Cache hit ratio
- Git sync success rate
- Search indexing latency
# User activity
- Active users
- Documents viewed
- Comments per document
- Collaborators count
Structured logs (JSON):
{
"timestamp": "2026-01-02T10:00:00Z",
"level": "INFO",
"service": "api",
"event": "document_updated",
"document_id": "doc-001",
"user_id": "user-123",
"blocks_changed": 5,
"duration_ms": 234
}
Pros: Simpler (no Git complexity)
Cons: Less portable, harder to backup, no external version control
Pros: Maximum portability
Cons: No real-time collaboration, search requires indexing
Pros: Feature-rich
Cons: Overkill, vendor lock-in
Chosen: Hybrid (Git + DB) ✅
- Best of both worlds
- Git for versioning + portability
- DB for performance + collaboration
Конец документа