graph TD
subgraph Input["Input Layer: One-Hot Encoded Context Words"]
A1["Context Word 1<br/>'the'<br/>[0,0,0,1,0,...,0]<br/>V-dimensional"]
A2["Context Word 2<br/>'economy'<br/>[0,0,0,0,1,...,0]<br/>V-dimensional"]
A3["Context Word 3<br/>'strong'<br/>[0,1,0,0,0,...,0]<br/>V-dimensional"]
A4["Context Word 4<br/>'and'<br/>[0,0,1,0,0,...,0]<br/>V-dimensional"]
end
subgraph Weights1["Weight Matrix W_in (V × N)"]
W1["Each row = input embedding<br/>for one word<br/>Dimensions: V × N<br/>(V = vocab size, N = embedding dim)"]
end
A1 -->|"v₁ = W_in^T × x₁"| W1
A2 -->|"v₂ = W_in^T × x₂"| W1
A3 -->|"v₃ = W_in^T × x₃"| W1
A4 -->|"v₄ = W_in^T × x₄"| W1
subgraph Hidden["Hidden Layer: Average Context Vector"]
H["h = (1/C) Σ vᵢ<br/>Average of context embeddings<br/>N-dimensional vector<br/>(No activation function)"]
end
W1 -->|"Extract & average<br/>word vectors"| H
subgraph Weights2["Weight Matrix W_out (N × V)"]
W2["Each column = output embedding<br/>for one word<br/>Dimensions: N × V"]
end
H -->|"u = W_out^T × h"| W2
subgraph Scores["Score Layer"]
S["u_w for each word w<br/>V scores (one per vocab word)<br/>u_w = W_out[:,w]^T × h"]
end
W2 --> S
subgraph Softmax["Softmax Layer"]
SM["P(w|context) = exp(u_w) / Σ exp(u_w')<br/>Convert scores to probabilities<br/>All probabilities sum to 1"]
end
S --> SM
subgraph Output["Output: Predicted Word Probabilities"]
O1["P('is') = 0.45"]
O2["P('very') = 0.20"]
O3["P('remains') = 0.15"]
O4["P('...') = 0.20"]
end
SM --> O1
SM --> O2
SM --> O3
SM --> O4
subgraph Loss["Loss Function"]
L["L = -log P(w_target|context)<br/>= -u_w_target + log Σ exp(u_w)<br/><br/>Minimize negative log likelihood<br/>of correct target word"]
end
O1 -.->|"Compare to<br/>actual target"| L
subgraph Gradient["Gradient Computation"]
G1["∂L/∂W_out[:,w] = (P(w|context) - y_w) × h<br/>y_w = 1 if w=target, else 0"]
G2["∂L/∂W_in = (1/C) Σ x_c × (Σ(P(w) - y_w) × W_out[:,w])^T"]
end
L --> G1
L --> G2
subgraph Update["Weight Update (Gradient Descent)"]
U["W_new = W_old - η × ∂L/∂W<br/>η = learning rate (typically 0.01-0.025)<br/><br/>Repeat for all training examples"]
end
G1 --> U
G2 --> U
U -.->|"Update weights<br/>iteratively"| W1
U -.->|"Update weights<br/>iteratively"| W2
style Input fill:#e1f5ff
style Hidden fill:#fff4e1
style Output fill:#e8f5e9
style Loss fill:#ffebee
style Gradient fill:#f3e5f5
style Update fill:#fff9c4
style Weights1 fill:#e0e0e0
style Weights2 fill:#e0e0e0