(Continues from Matrix #1)
1 Fibonacci Squared Subarray Sum
Problem. Given $a_1,\ldots,a_n$. Define:
$$f(S)=\sum_{T\subseteq S}\bigl(\mathrm{fib}(\textstyle\sum_{x\in T}x)\bigr)^2$$where $\mathrm{fib}(0)=0,\ \mathrm{fib}(1)=1,\ \mathrm{fib}(n)=\mathrm{fib}(n-1)+\mathrm{fib}(n-2)$.
Support:
- Point update $a_p\leftarrow v$.
- Query $\sum_{S=[a_i,\ldots,a_j]}f(S)$ over all contiguous subarrays $[i,j]\subseteq[l,r]$.
$(n,q\leq10^5,\ a_i\leq10^5)$, answer mod $998244353$.
Setup
Define the companion matrix $B=\begin{pmatrix}2&2&-1\\1&0&0\\0&1&0\end{pmatrix}$ so that $B^k$ encodes $(\mathrm{fib}(k)^2,\ \ldots)$. For each position $a_i$, let $M_i=I+B^{a_i}$ (a $3\times3$ matrix).
The key identity for combining adjacent intervals:
$$\text{given left interval }L\text{ and right interval }R,$$$$\text{prod}_{LR}=\text{prod}_L\cdot\text{prod}_R$$$$\text{pre}_{LR}=\text{pre}_L+\text{prod}_L\cdot\text{pre}_R$$$$\text{suf}_{LR}=\text{suf}_R+\text{suf}_L\cdot\text{prod}_R$$$$\text{sum}_{LR}=\text{sum}_L+\text{sum}_R+\text{suf}_L\cdot\text{pre}_R$$Each segment-tree node stores the 4-tuple $(\text{prod},\text{pre},\text{suf},\text{sum})$ — all $3\times3$ matrices. The final answer for a query is result.sum * v at index [0][0], where $v=[0,1,1]^\top$.
#include <bits/stdc++.h>
using namespace std;
using ll = long long;
const int MOD = 998244353;
struct Mat {
int a[3][3];
Mat(bool ident=false){
for(int i=0;i<3;i++) for(int j=0;j<3;j++) a[i][j]=(ident&&i==j)?1:0;
}
};
inline int addm(int x,int y){ int z=x+y; if(z>=MOD) z-=MOD; return z; }
inline int mulm(ll x,ll y){ return (int)(x*y%MOD); }
Mat operator+(const Mat& A,const Mat& B){
Mat C;
for(int i=0;i<3;i++) for(int j=0;j<3;j++) C.a[i][j]=addm(A.a[i][j],B.a[i][j]);
return C;
}
Mat operator*(const Mat& A,const Mat& B){
Mat C;
for(int i=0;i<3;i++) for(int k=0;k<3;k++) if(A.a[i][k])
for(int j=0;j<3;j++) C.a[i][j]=addm(C.a[i][j],mulm(A.a[i][k],B.a[k][j]));
return C;
}
Mat mpow(Mat A,int e){
Mat R(true);
while(e){ if(e&1) R=A*R; A=A*A; e>>=1; }
return R;
}
struct Node { int l,r; Mat prod,pre,suf,sum; };
const int N=100000+5;
int n,q,a[N];
Mat B,I(true),G; // companion matrix, identity, initial column vector
Node seg[N<<2];
inline void pull(Node& rt,const Node& L,const Node& R){
rt.prod=L.prod*R.prod;
rt.pre =L.pre +(L.prod*R.pre);
rt.suf =R.suf +(L.suf *R.prod);
rt.sum =L.sum + R.sum+(L.suf*R.pre);
}
inline void build(int u,int l,int r){
seg[u].l=l; seg[u].r=r;
if(l==r){
Mat M=I+mpow(B,a[l]); // leaf: I + B^{a_l}
seg[u].prod=seg[u].pre=seg[u].suf=seg[u].sum=M;
return;
}
int m=(l+r)>>1;
build(u<<1,l,m); build(u<<1|1,m+1,r);
pull(seg[u],seg[u<<1],seg[u<<1|1]);
}
inline void modify(int u,int p,int val){
if(seg[u].l==seg[u].r){
Mat M=I+mpow(B,val);
seg[u].prod=seg[u].pre=seg[u].suf=seg[u].sum=M;
return;
}
int m=(seg[u].l+seg[u].r)>>1;
if(p<=m) modify(u<<1,p,val); else modify(u<<1|1,p,val);
pull(seg[u],seg[u<<1],seg[u<<1|1]);
}
Node query(int u,int L,int R){
if(L<=seg[u].l&&seg[u].r<=R) return seg[u];
int m=(seg[u].l+seg[u].r)>>1;
if(R<=m) return query(u<<1,L,R);
if(L>m) return query(u<<1|1,L,R);
Node left=query(u<<1,L,R), right=query(u<<1|1,L,R);
Node res; pull(res,left,right); return res;
}
int main(){
ios::sync_with_stdio(false); cin.tie(nullptr);
// companion matrix B: [2 2 -1; 1 0 0; 0 1 0]
B=Mat();
B.a[0][0]=2; B.a[0][1]=2; B.a[0][2]=MOD-1;
B.a[1][0]=1; B.a[2][1]=1;
// initial vector G = [0,1,1]^T stored as a 3x1 "matrix"
G=Mat(); G.a[1][0]=1; G.a[2][0]=1;
cin>>n>>q;
for(int i=1;i<=n;i++) cin>>a[i];
build(1,1,n);
while(q--){
int op; cin>>op;
if(op==1){ int p,v; cin>>p>>v; a[p]=v; modify(1,p,v); }
else{
int l,r; cin>>l>>r;
Node res=query(1,l,r);
Mat ans=res.sum*G;
cout<<ans.a[0][0]<<'\n';
}
}
return 0;
}
2 Gaussian Elimination — Expected Random-Swap Sort
2.1 Problem — CodeChef CHEFONM
A permutation of $n$ elements. Each step: uniformly pick an unordered pair $\{i,j\}$ and swap them. Find the expected number of steps to sort the permutation (reach the identity). Output for all initial cycle types.
Total pairs: $T=\binom{n}{2}$.
2.2 State Space: Cycle-Type Partition
A permutation’s cycle structure is fully described by the multiset of cycle lengths (the “cycle type”). For example, a permutation of 6 elements might be in state $[3,2,1]$.
The number of distinct cycle types for $n$ elements equals $p(n)$ (partition function), which is small for small $n$ — this is why the approach is feasible.
2.3 One-Step Transitions
A) Split: swap within the same cycle of length $L$
Swapping two elements at “distance” $j$ (chord length $j$ in the cycle, $1\leq j\leq L-1$) splits the cycle into lengths $j$ and $L-j$.
Number of such unordered pairs in an $L$-cycle:
- $j\neq L/2$: exactly $L$ pairs.
- $j=L/2$ (only when $L$ even): exactly $L/2$ pairs.
For simplicity in implementation: iterate $j$ from 1 to $L-1$; for each $j$, add probability $\frac{L}{2T}$ to the transition splitting into $\{j, L-j\}$. This automatically accounts for the double-counting when $j=L-j$.
B) Merge: swap across two different cycles of lengths $a$ and $b$
The number of cross-pairs is $a\cdot b$, so the merging probability is $\frac{ab}{T}$.
2.4 Linear System via First-Step Analysis
Let $E[S]$ = expected steps to reach all-ones partition from state $S$.
$$E[S_{\text{ok}}]=0,\qquad E[S]=1+\sum_{S'}p(S\to S')\,E[S']$$Rearranging: $E[S]-\sum_{S'}p(S\to S')\,E[S']=1$.
In matrix form: $(I-Q)\mathbf{e}=\mathbf{1}$ where $Q_{S,S'}=p(S\to S')$. Solve by Gaussian elimination.
When to use Gaussian elimination vs. DP:
- DAG (no cycles): DP along reverse topological order, $O(V+E)$. Equivalently, back-substitution on a triangular system.
- Cyclic graph: states depend on each other in a cycle (e.g. partition $[2,1]\leftrightarrow[3]$ can transition both ways). Must solve the full linear system $(I-Q)\mathbf{e}=\mathbf{1}$, $O(K^3)$ Gaussian elimination.
- Hybrid: find SCCs, compress to a DAG; solve each SCC’s small system separately, propagate along DAG. Complexity: $\sum_iO(k_i^3)+O(E)$.
Rule of thumb: topological order when possible; linear equations only when cycles are unavoidable (and the state space is small enough after compression).
2.5 Implementation
#include <bits/stdc++.h>
constexpr int N = 10;
vector<double> anss[N + 1];
vector<double> gauss(vector<vector<double>> a, vector<double> b) {
int n = a.size();
for (int i = 0; i < n; ++i) {
double x = a[i][i];
for (int j = i; j < n; ++j) a[i][j] /= x;
b[i] /= x;
for (int j = 0; j < n; ++j) {
if (i == j) continue;
x = a[j][i];
for (int k = i; k < n; ++k) a[j][k] -= a[i][k] * x;
b[j] -= b[i] * x;
}
}
return b;
}
int main() {
ios::sync_with_stdio(false); cin.tie(nullptr);
cout << fixed << setprecision(7);
vector<vector<int>> partitions[N + 1];
for (int n = 1; n <= N; ++n) {
vector<int> partition;
// DFS generates partitions in ascending order
function<void(int,int)> dfs = [&](int rest, int last){
if (rest == 0){ partitions[n].push_back(partition); return; }
for (int i = 1; i <= last && i <= rest; ++i){
partition.push_back(i); dfs(rest-i, i); partition.pop_back();
}
};
dfs(n, n);
int cnt = partitions[n].size();
vector<vector<double>> a(cnt, vector<double>(cnt));
vector<double> b(cnt);
a[0][0] = 1; // terminal state: E = 0 → A[0][0]=1, b[0]=0
for (int x = 1; x < cnt; ++x) {
partition = partitions[n][x];
a[x][x] += 1; // diagonal: coefficient of E[S]
b[x] += 1; // RHS constant
int T = n * (n - 1) / 2;
// A) split: swap within a cycle of length partition[i]
for (int i = 0; i < (int)partition.size(); ++i) {
for (int j = 1; j < partition[i]; ++j) {
double prob = partition[i] * 0.5 / T;
auto newP(partition);
newP.erase(newP.begin() + i);
newP.push_back(j);
newP.push_back(partition[i] - j);
sort(newP.begin(), newP.end(), greater<>());
int idx = lower_bound(partitions[n].begin(), partitions[n].end(), newP)
- partitions[n].begin();
a[x][idx] -= prob;
}
}
// B) merge: swap across two different cycles
for (int i = 0; i < (int)partition.size(); ++i) {
for (int j = 0; j < i; ++j) {
double prob = partition[i] * partition[j] * 1.0 / T;
auto newP(partition);
newP.erase(newP.begin() + i);
newP.erase(newP.begin() + j);
newP.push_back(partition[i] + partition[j]);
sort(newP.begin(), newP.end(), greater<>());
int idx = lower_bound(partitions[n].begin(), partitions[n].end(), newP)
- partitions[n].begin();
a[x][idx] -= prob;
}
}
}
anss[n] = gauss(a, b);
}
int t; cin >> t;
while (t--) {
int n; cin >> n;
vector<int> a(n);
for (int i = 0; i < n; ++i){ cin >> a[i]; --a[i]; }
// find cycle type of the input permutation
vector<bool> vis(n);
vector<int> partition;
for (int i = 0; i < n; ++i) {
if (vis[i]) continue;
int len = 0;
for (int j = i; !vis[j]; j = a[j], ++len) vis[j] = true;
partition.push_back(len);
}
sort(partition.begin(), partition.end(), greater<>());
int idx = lower_bound(partitions[n].begin(), partitions[n].end(), partition)
- partitions[n].begin();
cout << anss[n][idx] << "\n";
}
return 0;
}