/* A backtracking algorithm for studying rich words that avoid specified powers

## Overview
This algorithms implements a backtracking search which searches for the lexicographically least rich word of length $L$ avoiding $r$-powers (or $r+$-powers) over an alphabet $\\{0, 1, \ldots, k-1\\}$ for a given rational number $r$ expressed as a ratio $p/q$ (if it exists). Testing for richness is implemented efficiently using the Eertree data structure with quick links [[1]](#1).

## Compilation

    g++ backtrack.cpp -o backtrack

## Usage

    ./backtrack L k p q prefix forbidden_factors

For example,

    ./backtrack 1000 3 2 1

searches for a rich word of length $1000$ over a ternary alphabet that avoids squares. The run of the program shows that such a word does not exist. The run shows that the longest and lexicographically least ternary word avoiding squares is $0102010$. Running

    ./backtrack 1000 3 2+ 1

attempts to find a $3$-letter rich word of length $1000$ that avoids $2+$-powers. The longest found word is of length $78$. The parameter prefix specifies that only words with this given prefix are searched for. A list of comma-seperated forbidden factors can also be given. For example

    ./backtrack 1000 3 7 3 012 00,22

searches for ternary words that are rich, avoid 7/3 powers, begin with 012, and do not contain the factor 00 or 22. The longest such word has length 18.

## References
<a id="1">[1]</a> 
M. Rubinchik, A. M. Shur. Eertree: An efficient data structure for processing palindromes in strings (2015) [arXiv:1506.04862](http://arxiv.org/abs/1506.04862).  */

/* MIT License

Copyright (c) 2024 Jarkko Peltomäki

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. */

/* eertree.h */

#ifndef H_EERTREE
#define H_EERTREE

#include <cassert>
#include <string>
#include <vector>
using namespace std;

#define INT2CHAR(i) i + '0'
#define CHAR2INT(c) c - '0'

/*
 * Node in the Eertree that represents a palindrome.
*/
class Node {
    public:
    // Number of letters.
    int letters;
    // Length of the palindrome.
    int length;
    // Length of the prefix that had this palindrome as a suffix for the first
    // time.
    int prefix_length;
    // The index of the Node that has an edge to this Node.
    int parent;
    // Outgoing edges that represent palindromes that have the current
    // palindrome as a center.
    int* edges;
    // Suffix link that holds the index of the Node that corresponds to the LPS
    // of the current palindrome. Index 1 is the empty palindrome.
    int suffix_link;
    // The quick link that holds the index of the Node that corresponds to the
    // LPS of the current palindrome that is preceded by a letter different
    // from the letter that precedes the LPS of the current palindrome.
    int quick_link;

    Node();
    Node(int letters, int length, int prefix_length, int parent, int suffix_link, int quick_link);
    Node(const Node& N);
    ~Node();
};

class EerTree {

    private:
    int letters;
    vector<Node> tree;
    // List of indices to Nodes that indicate the longest palindromic suffix of
    // each prefix of the word. This is used to implement the pop method.
    vector<int> previous_lps;
    // Index to the Node that corresponds to the current longest palindromic
    // suffix.
    int max_pal_suf;

    public:
    string word;

    EerTree();
    EerTree(int letters, string word);
    int longest_pal_suffix_preceding(int node_idx, char a);
    void add(char a);
    void pop();
    bool is_rich();
};

#endif

/* eertree.cpp */

Node::Node() {
}

Node::Node(int letters, int length, int prefix_length, int parent, int suffix_link, int quick_link) {
    this->letters = letters;
    this->length = length;
    this->prefix_length = prefix_length;
    this->parent = parent;
    this->suffix_link = suffix_link;
    this->quick_link = quick_link;
    this->edges = (int*) malloc(this->letters*sizeof(int));
    for (int i = 0; i < this->letters; i++) {
        this->edges[i] = -1;
    }
}

Node::Node(const Node& N) {
    this->letters = N.letters;
    this->length = N.length;
    this->prefix_length = N.prefix_length;
    this->parent = N.parent;
    this->suffix_link = N.suffix_link;
    this->quick_link = N.quick_link;
    this->edges = (int*) malloc(this->letters*sizeof(int));
    for (int i = 0; i < this->letters; i++) {
        this->edges[i] = N.edges[i];
    }
}

Node::~Node() {
    free(this->edges);
}

EerTree::EerTree() {
}

EerTree::EerTree(int letters, string word) {
    this->letters = letters;
    // Setup the tree with the two initial special nodes.
    this->tree = {Node(letters, -1, -1, -1, 0, 0), Node(letters, 0, 0, 0, 0, 0)};
    this->max_pal_suf = 1;

    for (char a : word) {
        this->add(a);
    }
}

/*
 * Finds the longest proper palindromic suffix preceded by the letter a of the
 * palindrome corresponding to the given node index. The return value is a node
 * index, and equals 0 (special node -1) if not found.
*/
int EerTree::longest_pal_suffix_preceding(int node_idx, char a) {
    int current_idx = node_idx;
    int prev_idx = current_idx;
    // Notice that 0 is the index of the special node -1.
    while (current_idx != 0) {
        int b = this->word.length() - 1 - this->tree[current_idx].length;
        if (b >= 0 && this->word[b] == a) {
            break;
        }
        prev_idx = current_idx;
        current_idx = this->tree[current_idx].suffix_link;
        if (current_idx == 0) break;
        b = this->word.length() - 1 - this->tree[current_idx].length;
        if (b >= 0 && this->word[b] == a) {
            break;
        }
        current_idx = this->tree[prev_idx].quick_link;
    }
    return current_idx;
}

void EerTree::add(char a) {
    // Let the current string be T.
    int current_pal_suffix = this->max_pal_suf;

    // Search for the longest palindrome suffix that is preceded by the letter
    // a or get the index of the special node -1 if it does not exist.
    int pal_suffix_idx = this->longest_pal_suffix_preceding(this->max_pal_suf, a);
    Node& pal_suffix = this->tree[pal_suffix_idx];

    // Check if the found node has an outgoing edge with the letter a.
    if (pal_suffix.edges[CHAR2INT(a)] != -1) {
        // Found the edge, so no new palindrome was created. Nothing needs to
        // be done except to add the letter a to the read word and to update
        // the longest palindromic suffix.
        this->previous_lps.push_back(this->max_pal_suf);
        this->max_pal_suf = pal_suffix.edges[CHAR2INT(a)];
        this->word.push_back(a);
        return;
    }

    // The current palindromic suffix surrounded by the new letter a forms a
    // novel palindromic suffix or we have a new letter. We need to create a
    // new Node for the palindrome and add it to the tree.
    int new_pal_length = pal_suffix.length + 2;
    // The default values correspond to the case where we are adding a new
    // letter, and the suffix link will point to the special node 0 (with index
    // 1) and the edges will be added to the special node -1 (with index 0).
    int suffix_link_target_idx = 1;
    int edge_add_idx = 0;
    if (new_pal_length > 1) {
        // The new palindrome is not a letter, and we need to search for its
        // longest palindromic proper suffix that is preceded by the letter a.
        //
        // This is done by taking the suffix link and searching for a
        // palindrome that is preceded by a by continuing the traversing until
        // the special node 0 is encountered. Then the edge corresponding to
        // the letter is traversed to find the suffix link target. This edge
        // always exists.
        int suffix_link_suffix_idx = this->longest_pal_suffix_preceding(pal_suffix.suffix_link, a);
        suffix_link_target_idx = this->tree[suffix_link_suffix_idx].edges[CHAR2INT(a)];
        edge_add_idx = pal_suffix_idx;
    }

    // Figure out the quick link.
    int quick_link_target_idx;
    // This is the position of the letter that precedes the LPS of the current
    // palindrome.
    int b = this->word.length() - this->tree[suffix_link_target_idx].length;
    if (b >= this->word.length()) {
        // We get here only when the new palindrome is a letter. We point the
        // quick link to the special node 0 (with index 1) for correct
        // behavior.
        quick_link_target_idx = 1;
    }
    else {
      // This is the position letter that precedes the LPS of the LPS of the
      // current palindrome.
      int c = this->word.length() - this->tree[this->tree[suffix_link_target_idx].suffix_link].length;
      if (c >= this->word.length()) {
          // We get here only when the LPS is a letter. We point the quick link
          // to the special node 0 (with index 1) for correct behavior.
          quick_link_target_idx = 1;
      }
      else {
          if (this->word[b] == this->word[c]) {
              quick_link_target_idx = this->tree[suffix_link_target_idx].quick_link;
          }
          else {
              quick_link_target_idx = this->tree[suffix_link_target_idx].suffix_link;
          }
      }
    }

    // Add a new Node to the tree, update edges, longest palindromic suffix,
    // and the word read so far.
    this->tree.push_back(Node(this->letters, new_pal_length, this->word.length() + 1, edge_add_idx, suffix_link_target_idx, quick_link_target_idx));
    this->tree[edge_add_idx].edges[CHAR2INT(a)] = this->tree.size() - 1;
    this->previous_lps.push_back(this->max_pal_suf);
    this->max_pal_suf = this->tree.size() - 1;
    this->word.push_back(a);
}

/*
 * Remove the last letter of the current word and restore the state of EerTree
 * to that situation.
 */
void EerTree::pop() {
    if (this->word.length() == 0) return;

    // Check if the previous step added a new palindrome or not.
    if (this->tree[this->max_pal_suf].prefix_length == this->word.length()) {
        // A new palindrome was added.
        //
        // We need to find its parent and remove the edge to the current Node.
        int parent = this->tree[this->max_pal_suf].parent;
        this->tree[parent].edges[CHAR2INT(this->word.back())] = -1;
        // Remove the Node from the tree.
        this->tree.pop_back();
    }

    // Remove last letter of the read word.
    this->word.pop_back();
    // Restore previous longest palindromic suffix.
    this->max_pal_suf = this->previous_lps.back();
    this->previous_lps.pop_back();
}

bool EerTree::is_rich() {
    return this->tree.size() - 2 == this->word.length();
}

/* suffix_tree.h */

#ifndef H_SUFFIXTREE
#define H_SUFFIXTREE

#include <string>
using namespace std;

#define INT2CHAR(i) i + '0'
#define CHAR2INT(c) c - '0'

/*
 * Node of a suffix tree.
*/
class SuffixNode {
    public:
    // Number of letters.
    int letters;
    bool is_terminal;
    // Outgoing edges that represent continuations of the current suffix.
    SuffixNode **edges;

    SuffixNode();
    SuffixNode(int letters);
    ~SuffixNode();
};

/*
 * A suffix tree that can be used to check if a given word has a suffix that is
 * among the words that make up the tree.
 */
class SuffixTree {
    private:
    int letters;
    SuffixNode *root;

    public:
    SuffixTree();
    SuffixTree(int letters);
    void add(const string& w);
    bool has_suffix(const string& w);
};

#endif

/* suffix_tree.cpp */

SuffixNode::SuffixNode() {
}

SuffixNode::SuffixNode(int letters) {
    this->letters = letters;
    this->is_terminal = false;
    this->edges = (SuffixNode**) malloc(this->letters*sizeof(SuffixNode*));
    for (int i = 0; i < letters; i++) {
        this->edges[i] = nullptr;
    }
}

SuffixNode::~SuffixNode() {
    free(this->edges);
}

SuffixTree::SuffixTree() {
}

SuffixTree::SuffixTree(int letters) {
    this->letters = letters;
    this->root = new SuffixNode(letters);
}

void SuffixTree::add(const string& w) {
    SuffixNode *current = this->root;
    for (int i = w.length() - 1; i >= 0; i--) {
        if (current->edges[CHAR2INT(w[i])] != nullptr) {
            current = current->edges[CHAR2INT(w[i])];
        }
        else {
            SuffixNode *new_node = new SuffixNode(this->letters);
            current->edges[CHAR2INT(w[i])] = new_node;
            current = new_node;
        }
    }
    current->is_terminal = true;
}

bool SuffixTree::has_suffix(const string& w) {
    SuffixNode *current = this->root;
    for (int i = w.length() - 1; i >= 0; i--) {
        if (current->is_terminal) {
            return true;
        }
        if (current->edges[CHAR2INT(w[i])] == nullptr) {
            return false;
        }
        else {
            current = current->edges[CHAR2INT(w[i])];
        }
    }
    return current->is_terminal;
}


/* backtrack.cpp */

#include <iostream>
#include <string>
#include <sstream>

/*
 * Split a string according to a comma separator and return the pieces as a vector.
 */
vector<string> split(string w) {
    stringstream ss(w);
    vector<string> tokens = vector<string>();
    string token;
    while (getline(ss, token, ',')) {
        tokens.push_back(token);
    }
    return tokens;
}

/*
 * Check if the given string has a suffix that is a repetition with exponent
 * top/bottom.
 */
bool suffix_repetition(string& w, int top, int bottom) {
    int n = w.length();
    int p = 1;
    while (n*bottom >= p*top) {
        int i = 0;
        while (w[n - 1 - i] == w[n - 1 - i - p]) {
            if ((i + 1 + p)*bottom >= p*top) {
                return true;
            }
            i++;
            if (i + p >= n) {
                break;
            }
        }
        p++;
    }
    return false;
}

/*
 * Check if the given string has a suffix that is a repetition with exponent
 * strictly greater than top/bottom.
 */
bool suffix_repetition_plus(string& w, int top, int bottom) {
    int n = w.length();
    int p = 1;
    while (n*bottom > p*top) {
        int i = 0;
        while (w[n - 1 - i] == w[n - 1 - i - p]) {
            if ((i + 1 + p)*bottom > p*top) {
                return true;
            }
            i++;
            if (i + p >= n) {
                break;
            }
        }
        p++;
    }
    return false;
}

string backtrack(bool (*suffix_condition)(string&, int, int), int max_length, int letters, int top, int bottom, string prefix = "", vector<string> forbidden_factors = vector<string>()) {
    char max_letter = INT2CHAR(letters - 1);
    int longest = prefix.length();
    EerTree eertree = EerTree(letters, prefix);
    SuffixTree suffix_tree = SuffixTree(letters);
    for (auto w : forbidden_factors) {
        suffix_tree.add(w);
    }
    char extension = '0';

    // Check that the given prefix does not contain any forbidden factors.
    for (int i = 1; i <= prefix.length(); i++) {
        string p = prefix.substr(0, i);
        if (suffix_tree.has_suffix(p)) {
            cout << "The given prefix contains a forbidden factor." << endl;
            exit(1);
        }
    }

    // Check that the given prefix is rich and does not satisfy the suffix
    // condition for all prefixes.
    if (not eertree.is_rich()) {
        cout << "The given prefix is not rich." << endl;
        exit(1);
    }
    for (int i = 1; i <= prefix.length(); i++) {
        string p = prefix.substr(0, i);
        if (suffix_condition(p, top, bottom)) {
            cout << "A prefix of the given prefix satisfies the suffix condition." << endl;
            exit(1);
        }
    }

    // This keeps track of the first appearance of each letter. This is used to
    // prune out isomorphic words. This functionality is disabled if there are
    // forbidden factors as this logic does then not work.
    vector<int> letter_positions = vector<int>(letters, -1);
    char max_allowed_letter = max_letter;
    if (forbidden_factors.size() == 0) {
        max_allowed_letter = '0';
        int filled = -1;
        for (int i = 0; i < prefix.length(); i++) {
            bool foo = prefix[i] > max_letter;
            if (prefix[i] > max_letter) {
                cout << "The given prefix has more than " << letters << " letters." << endl;
                exit(1);
            }
            if (letter_positions[CHAR2INT(prefix[i])] == -1) {
                if (CHAR2INT(prefix[i]) != filled + 1) {
                    cout << "The first occurrences of letters in the prefix must be in integer order." << endl;
                    exit(1);
                }
                letter_positions[CHAR2INT(prefix[i])] = i;
                max_allowed_letter++;
                filled++;
            }
        }
    }

    while (true) {
        eertree.add(extension);

        // Update the letter occurrences if the new letter does not previously
        // appear. This is disabled if there are forbidden factors.
        if (forbidden_factors.size() == 0 && letter_positions[CHAR2INT(extension)] == -1) {
            letter_positions[CHAR2INT(extension)] = eertree.word.length() - 1;
            max_allowed_letter++;
        }

        if (!suffix_tree.has_suffix(eertree.word) && eertree.is_rich() && !suffix_condition(eertree.word, top, bottom)) {
            // Adding the letter was successful.
            if (eertree.word.length() > longest) {
                longest = eertree.word.length();
                cout << longest << " " << eertree.word << endl;
            }
            if (eertree.word.length() == max_length) {
                return eertree.word;
            }
            extension = '0';
        }
        else {
            // Adding the letter was unsuccessful. Backtrack.
            bool allowed_to_increment = false;
            char a;
            while (!allowed_to_increment && eertree.word.length() - prefix.length() > 0) {
                a = eertree.word.back();
                eertree.pop();

                // Check if we happened to remove a letter completely.
                if (eertree.word.length() == letter_positions[CHAR2INT(max_allowed_letter - 1)]) {
                    letter_positions[CHAR2INT(max_allowed_letter - 1)] = -1;
                    max_allowed_letter--;
                }

                allowed_to_increment = a < min(max_letter, max_allowed_letter);
            }

            if (eertree.word.length() - prefix.length() <= 0 && a == min(max_letter, max_allowed_letter)) {
                // We backtracked to the beginning and must terminate.
                break;
            }
            else {
                // Increment the letter a to be the new extending letter.
                extension = a + 1;
            }
        }
    }
    return string("");
}

void help(string program) {
    cout << "Usage: " << program << " max_length n_letters top bottom [prefix] [forbidden_factors]" << endl;
    exit(1);
}

int main(int argc, char *argv[]) {
    if (argc < 5) {
        help(string(argv[0]));
    }
    auto max_length = atoi(argv[1]);
    auto letters = atoi(argv[2]);

    int top;
    int bottom;
    bool power_plus = false;
    string s_top = string(argv[3]);
    string s_bottom = string(argv[4]);
    if (s_top[s_top.length() - 1] == '+') {
        top = stoi(s_top.substr(0, s_top.length() - 1));
        power_plus = true;
    }
    else {
        top = stoi(s_top);
    }
    if (s_bottom[s_bottom.length() - 1] == '+') {
        bottom = stoi(s_bottom.substr(0, s_bottom.length() - 1));
        power_plus = true;
    }
    else {
        bottom = stoi(s_bottom);
    }

    string prefix;
    if (argc > 5) {
        prefix = string(argv[5]);
    }
    else {
        prefix = string("");
    }

    vector<string> forbidden_factors;
    if (argc > 6) {
        forbidden_factors = split(string(argv[6]));
    }
    else {
        forbidden_factors = vector<string>();
    }

    bool (*suffix_condition)(string&, int, int);
    if (power_plus) {
        suffix_condition = suffix_repetition_plus;
    }
    else {
        suffix_condition = suffix_repetition;
    }

    string found = backtrack(suffix_condition, max_length, letters, top, bottom, prefix, forbidden_factors);

    if (found.length() == 0) {
        cout << "The pattern is unavoidable!" << endl;
    }
    else {
        cout << "Found a word of length " << found.length() << endl;
    }

    return 0;
}