CSES Solutions - Substring Order II

Last Updated : 17 Jun, 2024
Comments
Improve
Suggest changes
Like Article
Like
Report

You are given a string of length n. If all of its substrings (not necessarily distinct) are ordered lexicographically, what is the kth smallest of them?

Examples:

Input: String = "agctagctagct" , k = 7
Output: AGC
Explanation: The 7 smallest substrings in order are a, a, a, ag, ag, ag and agc.

Input:  String = "bacbaa" , k = 5
Output: ac
Explanation:  The 5 smallest substrings in order are a, a, a, aa and ac.

Approach: To solve the problem, follow the below idea:

The problem can be solved by constructing the suffix array and longest common prefix (LCP) array of the input string. Then, we build a segment tree to efficiently count the number of substrings lexicographically smaller than or equal to a given substring. Finally, we iterate through the suffix array in reverse order, updating the segment tree with substring counts and using binary search to find the longest common prefix that satisfies the condition of having at least k substrings lexicographically smaller than or equal to the current substring, thereby identifying the kth smallest substring.

Step-by-step algorithm:

  • Construct the suffix array of the given string S.
  • Build the longest common prefix (LCP) array using the constructed suffix array.
  • For each suffix in the suffix array, calculate the number of substrings starting from that suffix by updating a segment tree.
  • Starting from the last suffix in the suffix array, iterate through each suffix:
    • Update the segment tree with the count of substrings starting from the current suffix.
    • Use binary search to find the longest common prefix that satisfies the condition of having at least k substrings lexicographically smaller than or equal to the current substring.
    • Once the longest common prefix is found, print the substring and terminate the loop.

Below is the implementation of the algorithm:

C++
#include <bits/stdc++.h>
using namespace std;

// Define int as long long data type
#define int long long
// Define endl as newline character
#define endl '\n'
// Alias for first element in a pair
#define F first
// Alias for second element in a pair
#define S second

// Define a constant for maximum size
const int mxN = 1e5 + 5;
// Arrays for suffix array, position, temporary, and longest
// common prefix
int sa[mxN], pos[mxN], tmp[mxN], lcp[mxN];
// Variables for gap and size of the string
int gap, N;
// Input string
string S;

// Function to compare two positions in the suffix array

bool comp(int x, int y)
{
    // Compare positions based on their values in the pos
    // array
    if (pos[x] != pos[y])
        return pos[x] < pos[y];
    // If pos[x] == pos[y], compare their positions with a
    // gap
    x += gap;
    y += gap;
    // If within bounds, return comparison result based on
    // pos array; otherwise, return comparison of x and y
    return (x < N && y < N) ? pos[x] < pos[y] : x > y;
}

// Function to construct the suffix array
void suffix()
{
    // Initialize sa and pos arrays with initial values
    for (int i = 0; i < N; i++)
        sa[i] = i, pos[i] = S[i];

    // Loop for different gap values
    for (gap = 1;; gap <<= 1) {
        // Sort the suffix array based on the comparison
        // function comp
        sort(sa, sa + N, comp);
        // Update temporary array tmp
        for (int i = 0; i < N - 1; i++)
            tmp[i + 1] = tmp[i] + comp(sa[i], sa[i + 1]);
        // Update pos array
        for (int i = 0; i < N; i++)
            pos[sa[i]] = tmp[i];
        // If all suffixes are sorted, break the loop
        if (tmp[N - 1] == N - 1)
            break;
    }
}

// Function to construct the longest common prefix array
void build_lcp()
{
    for (int i = 0, k = 0; i < N; i++)
        if (pos[i] != N - 1) {
            int j = sa[pos[i] + 1];
            while (S[i + k] == S[j + k])
                k++;
            lcp[pos[i]] = k;
            if (k)
                k--;
        }
}

// Segment tree related functions for efficient range
// queries and updates Segment tree array
pair<int, int> seg[mxN * 10];
// Array to mark updates
int mark[mxN * 10];

// Function to push updates down the segment tree
void push(int k)
{
    if (mark[k]) {
        mark[k] = 0;
        seg[2 * k].F = seg[2 * k + 1].F = seg[k].F / 2;
        seg[2 * k].S = seg[2 * k + 1].S = 0;
        mark[2 * k] = mark[2 * k + 1] = 1;
    }
}

// Function to update values in the segment tree
void update(int v, int a, int b, int k, int x, int y)
{
    if (b < x || a > y)
        return;
    if (a <= x && b >= y) {
        seg[k].S += v;
        return;
    }
    int h = min(b, y) - max(a, x) + 1;
    push(k);
    seg[k].F += h * v;
    int d = (x + y) / 2;
    update(v, a, b, 2 * k, x, d);
    update(v, a, b, 2 * k + 1, d + 1, y);
}

// Function to assign values in the segment tree
int assign(int v, int a, int b, int k, int x, int y)
{
    if (b < x || a > y)
        return seg[k].F + (y - x + 1) * seg[k].S;
    if (a <= x && b >= y) {
        seg[k].F = (y - x + 1) * v;
        seg[k].S = 0;
        mark[k] = 1;
        return seg[k].F;
    }
    push(k);
    int d = (x + y) / 2;
    seg[2 * k].S += seg[k].S, seg[2 * k + 1].S += seg[k].S;
    seg[k].S = 0;
    seg[k].F = assign(v, a, b, 2 * k, x, d)
               + assign(v, a, b, 2 * k + 1, d + 1, y);
    return seg[k].F;
}

// Function to calculate sum in a range using segment tree
int sum(int a, int b, int k, int x, int y)
{
    if (b < x || a > y)
        return 0;
    if (a <= x && b >= y) {
        return seg[k].F + (y - x + 1) * seg[k].S;
    }
    push(k);
    seg[k].F += (y - x + 1) * seg[k].S;
    seg[2 * k].S += seg[k].S, seg[2 * k + 1].S += seg[k].S;
    seg[k].S = 0;
    int d = (x + y) / 2;
    return sum(a, b, 2 * k, x, d)
           + sum(a, b, 2 * k + 1, d + 1, y);
}

signed main()
{

    // Input string
    cin >> S;
    // Size of the string
    N = S.size();
    // Construct suffix array
    suffix();
    // Construct longest common prefix array
    build_lcp();
    // Given k value for substring extraction
    int k;
      cin >> k;

    // Calculate the k-th smallest lexicographically suffix
    // Calculate the position from the last
    k = N * (N + 1) / 2 - k + 1;

    // Construct a segment tree
    // Calculate the size of the segment tree array
    int K = 1 << (int)ceil(log2(N + 1));
    // Initialize current position
    int cur = 0;
    for (int i = N - 1; i >= 0; i--) {
        // Update the segment tree
        update(1, 1, N - sa[i], 1, 0, K - 1);
        // Get the previous longest common prefix
        int prev = (i ? lcp[i - 1] : 0);
        // Set the search range for the binary search
        int l = prev + 1, r = N - sa[i];
        // Initialize answer variable
        int ans = -1;
        // Binary search to find the longest common prefix
        // that satisfies the condition
        while (l <= r) {
            int m = l + (r - l) / 2;
            if (cur + sum(m, N - sa[i], 1, 0, K - 1) >= k) {
                ans = m;
                l = m + 1;
            }
            else
                r = m - 1;
        }
        // If answer is found, print the substring and exit
        // the loop
        if (ans != -1) {
            return cout << S.substr(sa[i], ans), 0;
        }
        // Update the current position and mark the segment
        // tree
        cur += sum(prev + 1, N - sa[i], 1, 0, K - 1);
        assign(0, prev + 1, N - sa[i], 1, 0, K - 1);
    }
}
Python
import math

# Define a constant for maximum size
mxN = 10**5 + 5
# Arrays for suffix array, position, temporary, and longest common prefix
sa = [0] * mxN
pos = [0] * mxN
tmp = [0] * mxN
lcp = [0] * mxN
# Variables for gap and size of the string
gap = 0
N = 0
# Input string
S = ""

# Function to compare two positions in the suffix array
def comp(x, y):
    global pos, gap, N
    # Compare positions based on their values in the pos array
    if pos[x] != pos[y]:
        return pos[x] < pos[y]
    # If pos[x] == pos[y], compare their positions with a gap
    x += gap
    y += gap
    # If within bounds, return comparison result based on pos array;
    # otherwise, return comparison of x and y
    return pos[x] < pos[y] if (x < N and y < N) else x > y

# Function to construct the suffix array
def suffix():
    global sa, pos, tmp, gap, N
    # Initialize sa and pos arrays with initial values
    for i in range(N):
        sa[i] = i
        pos[i] = ord(S[i])

    # Loop for different gap values
    gap = 1
    while True:
        # Sort the suffix array based on the comparison function comp
        sa.sort(key=lambda x: (pos[x], pos[x + gap] if x + gap < N else -1))
        # Update temporary array tmp
        tmp[0] = 0
        for i in range(1, N):
            tmp[i] = tmp[i - 1] + comp(sa[i - 1], sa[i])
        # Update pos array
        for i in range(N):
            pos[sa[i]] = tmp[i]
        # If all suffixes are sorted, break the loop
        if tmp[N - 1] == N - 1:
            break
        gap *= 2

# Function to construct the longest common prefix array
def build_lcp():
    global lcp, pos, N
    k = 0
    for i in range(N):
        if pos[i] != N - 1:
            j = sa[pos[i] + 1]
            while i + k < N and j + k < N and S[i + k] == S[j + k]:
                k += 1
            lcp[pos[i]] = k
            if k > 0:
                k -= 1

# Segment tree related functions for efficient range queries and updates Segment tree array
seg = [(0, 0)] * (mxN * 10)
# Array to mark updates
mark = [0] * (mxN * 10)

# Function to push updates down the segment tree
def push(k):
    global seg, mark
    if mark[k]:
        mark[k] = 0
        seg[2 * k] = seg[2 * k + 1] = (seg[k][0] // 2, 0)
        mark[2 * k] = mark[2 * k + 1] = 1

# Function to update values in the segment tree
def update(v, a, b, k, x, y):
    global seg
    if b < x or a > y:
        return
    if a <= x and b >= y:
        seg[k] = (seg[k][0], seg[k][1] + v)
        return
    h = min(b, y) - max(a, x) + 1
    push(k)
    seg[k] = (seg[k][0] + h * v, seg[k][1])
    d = (x + y) // 2
    update(v, a, b, 2 * k, x, d)
    update(v, a, b, 2 * k + 1, d + 1, y)

# Function to assign values in the segment tree
def assign(v, a, b, k, x, y):
    global seg, mark
    if b < x or a > y:
        return seg[k][0] + (y - x + 1) * seg[k][1]
    if a <= x and b >= y:
        seg[k] = ((y - x + 1) * v, 0)
        mark[k] = 1
        return seg[k][0]
    push(k)
    d = (x + y) // 2
    seg[2 * k] = (seg[k][0], seg[2 * k][1] + seg[k][1])
    seg[2 * k + 1] = (seg[k][0], seg[2 * k + 1][1] + seg[k][1])
    seg[k] = assign(v, a, b, 2 * k, x, d) + assign(v, a, b, 2 * k + 1, d + 1, y)
    return seg[k][0]

# Function to calculate sum in a range using segment tree
def _sum(a, b, k, x, y):
    global seg
    if b < x or a > y:
        return 0
    if a <= x and b >= y:
        return seg[k][0] + (y - x + 1) * seg[k][1]
    push(k)
    seg[k] = (seg[k][0] + (y - x + 1) * seg[k][1], seg[k][1])
    seg[2 * k] = (seg[2 * k][0], seg[2 * k][1] + seg[k][1])
    seg[2 * k + 1] = (seg[2 * k + 1][0], seg[2 * k + 1][1] + seg[k][1])
    d = (x + y) // 2
    return _sum(a, b, 2 * k, x, d) + _sum(a, b, 2 * k + 1, d + 1, y)

# Input string
S = input().strip()
# Size of the string
N = len(S)
# Construct suffix array
suffix()
# Construct longest common prefix array
build_lcp()
# Given k value for substring extraction
k = int(input())

# Calculate the k-th smallest lexicographically suffix
# Calculate the position from the last
k = N * (N + 1) // 2 - k + 1

# Construct a segment tree
# Calculate the size of the segment tree array
K = 1 << (math.ceil(math.log2(N + 1)))
# Initialize current position
cur = 0
for i in range(N - 1, -1, -1):
    # Update the segment tree
    update(1, 1, N - sa[i], 1, 0, K - 1)
    # Get the previous longest common prefix
    prev = lcp[i - 1] if i > 0 else 0
    # Set the search range for the binary search
    l, r = prev + 1, N - sa[i]
    # Initialize answer variable
    ans = -1
    # Binary search to find the longest common prefix that satisfies the condition
    while l <= r:
        m = (l + r) // 2
        if cur + _sum(m, N - sa[i], 1, 0, K - 1) >= k:
            ans = m
            r = m - 1
        else:
            l = m + 1
    # If answer is found, print the substring and exit the loop
    if ans != -1:
        print(S[sa[i]:sa[i] + ans])
        break
    # Update the current position and mark the segment tree
    cur += _sum(prev + 1, N - sa[i], 1, 0, K - 1)
    assign(0, prev + 1, N - sa[i], 1, 0, K - 1)
JavaScript
const mxN = 1e5 + 5;
let sa = new Array(mxN), pos = new Array(mxN), tmp = new Array(mxN), 
lcp = new Array(mxN);
let gap, N;
let S;

// Function to compare two positions in the suffix array
function comp(x, y) {
    if (pos[x] !== pos[y]) return pos[x] < pos[y];
    x += gap;
    y += gap;
    return (x < N && y < N) ? pos[x] < pos[y] : x > y;
}

// Function to construct the suffix array
function suffix() {
    for (let i = 0; i < N; i++) {
        sa[i] = i;
        pos[i] = S.charCodeAt(i);
    }

    for (gap = 1; ; gap <<= 1) {
        sa.sort((a, b) => comp(a, b) ? -1 : 1);

        tmp[0] = 0;
        for (let i = 0; i < N - 1; i++) {
            tmp[i + 1] = tmp[i] + (comp(sa[i], sa[i + 1]) ? 1 : 0);
        }

        for (let i = 0; i < N; i++) {
            pos[sa[i]] = tmp[i];
        }

        if (tmp[N - 1] === N - 1) break;
    }
}

// Function to construct the longest common prefix array
function build_lcp() {
        for (let i = 0, k = 0; i < N; i++) {
        if (pos[i] !== N - 1) {
            let j = sa[pos[i] + 1];
            while (i + k < N && j + k < N && S[i + k] === S[j + k]) k++;
            lcp[pos[i]] = k;
            if (k) k--;
        }
    }
}

// Segment tree related functions for efficient range queries and updates
let seg = new Array(mxN * 10).fill([0, 0]);
let mark = new Array(mxN * 10).fill(0);

// Function to push updates down the segment tree
function push(k) {
    if (mark[k]) {
        mark[k] = 0;
        seg[2 * k] = [seg[k][0] / 2, 0];
        seg[2 * k + 1] = [seg[k][0] / 2, 0];
        mark[2 * k] = mark[2 * k + 1] = 1;
    }
}

// Function to update values in the segment tree
function update(v, a, b, k, x, y) {
    if (b < x || a > y) return;
    if (a <= x && b >= y) {
        seg[k][1] += v;
        return;
    }
    let h = Math.min(b, y) - Math.max(a, x) + 1;
    push(k);
    seg[k][0] += h * v;
    let d = Math.floor((x + y) / 2);
    update(v, a, b, 2 * k, x, d);
    update(v, a, b, 2 * k + 1, d + 1, y);
}

// Function to assign values in the segment tree
function assign(v, a, b, k, x, y) {
    if (b < x || a > y) return seg[k][0] + (y - x + 1) * seg[k][1];
    if (a <= x && b >= y) {
        seg[k][0] = (y - x + 1) * v;
        seg[k][1] = 0;
        mark[k] = 1;
        return seg[k][0];
    }
    push(k);
    let d = Math.floor((x + y) / 2);
    seg[2 * k][1] += seg[k][1];
    seg[2 * k + 1][1] += seg[k][1];
    seg[k][1] = 0;
    seg[k][0] = assign(v, a, b, 2 * k, x, d) + 
    assign(v, a, b, 2 * k + 1, d + 1, y);
    return seg[k][0];
}

// Function to calculate sum in a range using segment tree
function sum(a, b, k, x, y) {
    if (b < x || a > y) return 0;
    if (a <= x && b >= y) return seg[k][0] + (y - x + 1) * seg[k][1];
    push(k);
    seg[k][0] += (y - x + 1) * seg[k][1];
    seg[2 * k][1] += seg[k][1];
    seg[2 * k + 1][1] += seg[k][1];
    seg[k][1] = 0;
    let d = Math.floor((x + y) / 2);
    return sum(a, b, 2 * k, x, d) + sum(a, b, 2 * k + 1, d + 1, y);
}

function main() {
    // Input string
    S = prompt("Enter the string:");
    // Size of the string
    N = S.length;
    // Construct suffix array
    suffix();
    // Construct longest common prefix array
    build_lcp();
    // Given k value for substring extraction
    let k = parseInt(prompt("Enter the value of k:"));

    // Calculate the k-th smallest lexicographically suffix
    k = N * (N + 1) / 2 - k + 1;

    // Construct a segment tree
    let K = 1 << Math.ceil(Math.log2(N + 1));
    let cur = 0;
    for (let i = N - 1; i >= 0; i--) {
        // Update the segment tree
        update(1, 1, N - sa[i], 1, 0, K - 1);
        // Get the previous longest common prefix
        let prev = (i ? lcp[i - 1] : 0);
        // Set the search range for the binary search
        let l = prev + 1, r = N - sa[i];
        // Initialize answer variable
        let ans = -1;
        // Binary search to find the longest common prefix that 
        // satisfies the condition
        while (l <= r) {
            let m = l + Math.floor((r - l) / 2);
            if (cur + sum(m, N - sa[i], 1, 0, K - 1) >= k) {
                ans = m;
                l = m + 1;
            } else {
                r = m - 1;
            }
        }
        // If answer is found, print the substring and exit the loop
        if (ans !== -1) {
            console.log(S.substring(sa[i], sa[i] + ans));
            return;
        }
        // Update the current position and mark the segment tree
        cur += sum(prev + 1, N - sa[i], 1, 0, K - 1);
        assign(0, prev + 1, N - sa[i], 1, 0, K - 1);
    }
}

main();

// This code is contributed by Shivam Gupta

Output
AGC

Time Complexity: O(N log N ), where N is the length of the input string.
Auxiliary Space: O(N), where N is the length of the input string.


Next Article
Practice Tags :

Similar Reads