You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1677 lines
47 KiB
1677 lines
47 KiB
2 years ago
|
// Apache License
|
||
|
// Version 2.0, January 2004
|
||
|
// http://www.apache.org/licenses/
|
||
|
// https://github.com/alexst07/glob-cpp
|
||
|
|
||
|
#pragma once
|
||
|
|
||
|
#include <memory>
|
||
|
#include <string>
|
||
|
#include <tuple>
|
||
|
#include <vector>
|
||
|
|
||
|
namespace glob {
|
||
|
|
||
|
template <class charT> using String = std::basic_string<charT>;
|
||
|
|
||
|
template <class charT> class Automata;
|
||
|
|
||
|
class Error : public std::exception
|
||
|
{
|
||
|
public:
|
||
|
Error(const std::string &msg) : msg_{msg} {}
|
||
|
|
||
|
const char *what() const throw() override { return msg_.c_str(); }
|
||
|
|
||
|
private:
|
||
|
std::string msg_;
|
||
|
};
|
||
|
|
||
|
enum class StateType {
|
||
|
MATCH,
|
||
|
FAIL,
|
||
|
CHAR,
|
||
|
QUESTION,
|
||
|
MULT,
|
||
|
SET,
|
||
|
GROUP,
|
||
|
UNION,
|
||
|
};
|
||
|
|
||
|
// From cppreference.com
|
||
|
template <class T, class U = T> const T exchange(T &obj, U &&new_value)
|
||
|
{
|
||
|
T old_value = std::move(obj);
|
||
|
obj = std::forward<U>(new_value);
|
||
|
return old_value;
|
||
|
}
|
||
|
|
||
|
template <class charT> class State
|
||
|
{
|
||
|
public:
|
||
|
State(StateType type, Automata<charT> &states) : type_(type), states_(states) {}
|
||
|
|
||
|
virtual ~State() = default;
|
||
|
|
||
|
virtual bool Check(const String<charT> &str, size_t pos) = 0;
|
||
|
|
||
|
virtual std::tuple<size_t, size_t> Next(const String<charT> &str, size_t pos) = 0;
|
||
|
|
||
|
StateType Type() const { return type_; }
|
||
|
|
||
|
Automata<charT> &GetAutomata() { return states_; }
|
||
|
|
||
|
void AddNextState(size_t state_pos) { next_states_.push_back(state_pos); }
|
||
|
|
||
|
const std::vector<size_t> &GetNextStates() const { return next_states_; }
|
||
|
|
||
|
const String<charT> &MatchedStr() { return matched_str_; }
|
||
|
|
||
|
virtual void ResetState() {}
|
||
|
|
||
|
protected:
|
||
|
void SetMatchedStr(const String<charT> &str) { matched_str_ = str; }
|
||
|
|
||
|
void SetMatchedStr(charT c) { matched_str_ = c; }
|
||
|
|
||
|
private:
|
||
|
StateType type_;
|
||
|
Automata<charT> &states_;
|
||
|
std::vector<size_t> next_states_;
|
||
|
String<charT> matched_str_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class StateFail : public State<charT>
|
||
|
{
|
||
|
public:
|
||
|
StateFail(Automata<charT> &states) : State<charT>(StateType::FAIL, states) {}
|
||
|
|
||
|
bool Check(const String<charT> &, size_t) override { return false; }
|
||
|
|
||
|
std::tuple<size_t, size_t> Next(const String<charT> &, size_t pos) override
|
||
|
{
|
||
|
return std::tuple<size_t, size_t>(0, ++pos);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <class charT> class StateMatch : public State<charT>
|
||
|
{
|
||
|
public:
|
||
|
StateMatch(Automata<charT> &states) : State<charT>(StateType::MATCH, states) {}
|
||
|
|
||
|
bool Check(const String<charT> &, size_t) override { return true; }
|
||
|
|
||
|
std::tuple<size_t, size_t> Next(const String<charT> &, size_t pos) override
|
||
|
{
|
||
|
return std::tuple<size_t, size_t>(0, ++pos);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <class charT> class Automata
|
||
|
{
|
||
|
public:
|
||
|
Automata() = default;
|
||
|
|
||
|
Automata(const Automata<charT> &) = delete;
|
||
|
|
||
|
Automata<charT> &operator=(const Automata<charT> &automata) = delete;
|
||
|
|
||
|
Automata(Automata<charT> &&automata)
|
||
|
: states_{std::move(automata.states_)}, match_state_{automata.match_state_},
|
||
|
fail_state_{exchange(automata.fail_state_, 0)}, start_state_{
|
||
|
exchange(automata.start_state_, 0)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
Automata<charT> &operator=(Automata<charT> &&automata)
|
||
|
{
|
||
|
states_ = std::move(automata.states_);
|
||
|
match_state_ = automata.match_state_;
|
||
|
fail_state_ = automata.fail_state_;
|
||
|
start_state_ = automata.start_state_;
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
const State<charT> &GetState(size_t pos) const { return *states_[pos]; }
|
||
|
|
||
|
State<charT> &GetState(size_t pos) { return *states_[pos]; }
|
||
|
|
||
|
size_t FailState() const { return fail_state_; }
|
||
|
|
||
|
Automata<charT> &SetFailState(size_t state_pos)
|
||
|
{
|
||
|
fail_state_ = state_pos;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
Automata<charT> &SetMatchState(size_t state_pos)
|
||
|
{
|
||
|
match_state_ = state_pos;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
size_t GetNumStates() const { return states_.size(); }
|
||
|
|
||
|
std::tuple<bool, size_t> Exec(const String<charT> &str, bool comp_end = true)
|
||
|
{
|
||
|
auto r = ExecAux(str, comp_end);
|
||
|
ResetStates();
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
std::vector<String<charT>> GetMatchedStrings() const
|
||
|
{
|
||
|
std::vector<String<charT>> vec;
|
||
|
|
||
|
for (auto &state : states_) {
|
||
|
if (state->Type() == StateType::MULT || state->Type() == StateType::QUESTION ||
|
||
|
state->Type() == StateType::GROUP || state->Type() == StateType::SET) {
|
||
|
vec.push_back(state->MatchedStr());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return vec;
|
||
|
}
|
||
|
|
||
|
template <class T, typename... Args> size_t NewState(Args &&...args)
|
||
|
{
|
||
|
size_t state_pos = states_.size();
|
||
|
auto state = std::unique_ptr<State<charT>>(new T(*this, std::forward<Args>(args)...));
|
||
|
|
||
|
states_.push_back(std::move(state));
|
||
|
return state_pos;
|
||
|
}
|
||
|
|
||
|
size_t fail_state_;
|
||
|
|
||
|
private:
|
||
|
std::tuple<bool, size_t> ExecAux(const String<charT> &str, bool comp_end = true) const
|
||
|
{
|
||
|
size_t state_pos = 0;
|
||
|
size_t str_pos = 0;
|
||
|
|
||
|
// run the state vector until state reaches fail or match state, or
|
||
|
// until the string is all consumed
|
||
|
while (state_pos != fail_state_ && state_pos != match_state_ && str_pos < str.length()) {
|
||
|
std::tie(state_pos, str_pos) = states_[state_pos]->Next(str, str_pos);
|
||
|
}
|
||
|
|
||
|
// if comp_end is true it matches only if the automata reached the end of
|
||
|
// the string
|
||
|
if (comp_end) {
|
||
|
if ((state_pos == match_state_) && (str_pos == str.length())) {
|
||
|
return std::tuple<bool, size_t>(state_pos == match_state_, str_pos);
|
||
|
}
|
||
|
|
||
|
return std::tuple<bool, size_t>(false, str_pos);
|
||
|
}
|
||
|
else {
|
||
|
// if comp_end is false, compare only if the states reached the
|
||
|
// match state
|
||
|
return std::tuple<bool, size_t>(state_pos == match_state_, str_pos);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ResetStates()
|
||
|
{
|
||
|
for (auto &state : states_) {
|
||
|
state->ResetState();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
std::vector<std::unique_ptr<State<charT>>> states_;
|
||
|
size_t match_state_;
|
||
|
|
||
|
size_t start_state_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class StateChar : public State<charT>
|
||
|
{
|
||
|
using State<charT>::GetNextStates;
|
||
|
using State<charT>::GetAutomata;
|
||
|
|
||
|
public:
|
||
|
StateChar(Automata<charT> &states, charT c) : State<charT>(StateType::CHAR, states), c_{c} {}
|
||
|
|
||
|
bool Check(const String<charT> &str, size_t pos) override { return (c_ == str[pos]); }
|
||
|
|
||
|
std::tuple<size_t, size_t> Next(const String<charT> &str, size_t pos) override
|
||
|
{
|
||
|
if (c_ == str[pos]) {
|
||
|
this->SetMatchedStr(c_);
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[0], pos + 1);
|
||
|
}
|
||
|
|
||
|
return std::tuple<size_t, size_t>(GetAutomata().FailState(), pos + 1);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
charT c_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class StateAny : public State<charT>
|
||
|
{
|
||
|
using State<charT>::GetNextStates;
|
||
|
using State<charT>::GetAutomata;
|
||
|
|
||
|
public:
|
||
|
StateAny(Automata<charT> &states) : State<charT>(StateType::QUESTION, states) {}
|
||
|
|
||
|
bool Check(const String<charT> &, size_t) override
|
||
|
{
|
||
|
// as it match any char, it is always trye
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> Next(const String<charT> &str, size_t pos) override
|
||
|
{
|
||
|
this->SetMatchedStr(str[pos]);
|
||
|
// state any always match with any char
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[0], pos + 1);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <class charT> class StateStar : public State<charT>
|
||
|
{
|
||
|
using State<charT>::GetNextStates;
|
||
|
using State<charT>::GetAutomata;
|
||
|
|
||
|
public:
|
||
|
StateStar(Automata<charT> &states) : State<charT>(StateType::MULT, states) {}
|
||
|
|
||
|
bool Check(const String<charT> &, size_t) override
|
||
|
{
|
||
|
// as it match any char, it is always trye
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> Next(const String<charT> &str, size_t pos) override
|
||
|
{
|
||
|
// next state vector from StateStar has two elements, the element 0 points
|
||
|
// to the same state, and the element points to next state if the
|
||
|
// conditions is satisfied
|
||
|
if (GetAutomata().GetState(GetNextStates()[1]).Type() == StateType::MATCH) {
|
||
|
// this case occurs when star is in the end of the glob, so the pos is
|
||
|
// the end of the string, because all string is consumed
|
||
|
this->SetMatchedStr(str.substr(pos));
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], str.length());
|
||
|
}
|
||
|
|
||
|
bool res = GetAutomata().GetState(GetNextStates()[1]).Check(str, pos);
|
||
|
// if the next state is satisfied goes to next state
|
||
|
if (res) {
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], pos);
|
||
|
}
|
||
|
|
||
|
// while the next state check is false, the string is consumed by star state
|
||
|
this->SetMatchedStr(this->MatchedStr() + str[pos]);
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[0], pos + 1);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <class charT> class SetItem
|
||
|
{
|
||
|
public:
|
||
|
SetItem() = default;
|
||
|
virtual ~SetItem() = default;
|
||
|
|
||
|
virtual bool Check(charT c) const = 0;
|
||
|
};
|
||
|
|
||
|
template <class charT> class SetItemChar : public SetItem<charT>
|
||
|
{
|
||
|
public:
|
||
|
SetItemChar(charT c) : c_{c} {}
|
||
|
|
||
|
bool Check(charT c) const override { return c == c_; }
|
||
|
|
||
|
private:
|
||
|
charT c_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class SetItemRange : public SetItem<charT>
|
||
|
{
|
||
|
public:
|
||
|
SetItemRange(charT start, charT end)
|
||
|
: start_{start < end ? start : end}, end_{start < end ? end : start}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
bool Check(charT c) const override { return (c >= start_) && (c <= end_); }
|
||
|
|
||
|
private:
|
||
|
charT start_;
|
||
|
charT end_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class StateSet : public State<charT>
|
||
|
{
|
||
|
using State<charT>::GetNextStates;
|
||
|
using State<charT>::GetAutomata;
|
||
|
|
||
|
public:
|
||
|
StateSet(Automata<charT> &states, std::vector<std::unique_ptr<SetItem<charT>>> items,
|
||
|
bool neg = false)
|
||
|
: State<charT>(StateType::SET, states), items_{std::move(items)}, neg_{neg}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
bool SetCheck(const String<charT> &str, size_t pos) const
|
||
|
{
|
||
|
for (auto &item : items_) {
|
||
|
// if any item match, then the set match with char
|
||
|
if (item.get()->Check(str[pos])) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool Check(const String<charT> &str, size_t pos) override
|
||
|
{
|
||
|
if (neg_) {
|
||
|
return !SetCheck(str, pos);
|
||
|
}
|
||
|
|
||
|
return SetCheck(str, pos);
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> Next(const String<charT> &str, size_t pos) override
|
||
|
{
|
||
|
if (Check(str, pos)) {
|
||
|
this->SetMatchedStr(str[pos]);
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[0], pos + 1);
|
||
|
}
|
||
|
|
||
|
return std::tuple<size_t, size_t>(GetAutomata().FailState(), pos + 1);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
std::vector<std::unique_ptr<SetItem<charT>>> items_;
|
||
|
bool neg_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class StateGroup : public State<charT>
|
||
|
{
|
||
|
using State<charT>::GetNextStates;
|
||
|
using State<charT>::GetAutomata;
|
||
|
|
||
|
public:
|
||
|
enum class Type { BASIC, ANY, STAR, PLUS, NEG, AT };
|
||
|
|
||
|
StateGroup(Automata<charT> &states, Type type,
|
||
|
std::vector<std::unique_ptr<Automata<charT>>> &&automatas)
|
||
|
: State<charT>(StateType::GROUP, states), type_{type}, automatas_{std::move(automatas)},
|
||
|
match_one_{false}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void ResetState() override { match_one_ = false; }
|
||
|
|
||
|
std::tuple<bool, size_t> BasicCheck(const String<charT> &str, size_t pos)
|
||
|
{
|
||
|
String<charT> str_part = str.substr(pos);
|
||
|
bool r;
|
||
|
size_t str_pos = 0;
|
||
|
|
||
|
// each automata is a part of a union of the group, in basic check,
|
||
|
// we want find only if any automata is true
|
||
|
for (auto &automata : automatas_) {
|
||
|
std::tie(r, str_pos) = automata->Exec(str_part, false);
|
||
|
if (r) {
|
||
|
return std::tuple<bool, size_t>(r, pos + str_pos);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return std::tuple<bool, size_t>(false, pos + str_pos);
|
||
|
}
|
||
|
|
||
|
bool Check(const String<charT> &str, size_t pos) override
|
||
|
{
|
||
|
bool r = false;
|
||
|
switch (type_) {
|
||
|
case Type::BASIC:
|
||
|
case Type::AT:
|
||
|
case Type::ANY:
|
||
|
case Type::STAR:
|
||
|
case Type::PLUS:
|
||
|
case Type::NEG: {
|
||
|
std::tie(r, std::ignore) = BasicCheck(str, pos);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> Next(const String<charT> &str, size_t pos) override
|
||
|
{
|
||
|
// STATE 1 -> is the next state
|
||
|
// STATE 0 -> is the same state
|
||
|
switch (type_) {
|
||
|
case Type::BASIC:
|
||
|
case Type::AT: {
|
||
|
return NextBasic(str, pos);
|
||
|
}
|
||
|
|
||
|
case Type::ANY: {
|
||
|
return NextAny(str, pos);
|
||
|
}
|
||
|
|
||
|
case Type::STAR: {
|
||
|
return NextStar(str, pos);
|
||
|
}
|
||
|
|
||
|
case Type::PLUS: {
|
||
|
return NextPlus(str, pos);
|
||
|
}
|
||
|
|
||
|
case Type::NEG: {
|
||
|
return NextNeg(str, pos);
|
||
|
}
|
||
|
}
|
||
|
return std::tuple<size_t, size_t>(0, 0);
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> NextNeg(const String<charT> &str, size_t pos)
|
||
|
{
|
||
|
bool r;
|
||
|
size_t new_pos;
|
||
|
std::tie(r, new_pos) = BasicCheck(str, pos);
|
||
|
if (r) {
|
||
|
this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos));
|
||
|
return std::tuple<size_t, size_t>(GetAutomata().FailState(), new_pos);
|
||
|
}
|
||
|
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], pos);
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> NextBasic(const String<charT> &str, size_t pos)
|
||
|
{
|
||
|
bool r;
|
||
|
size_t new_pos;
|
||
|
std::tie(r, new_pos) = BasicCheck(str, pos);
|
||
|
if (r) {
|
||
|
this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos));
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], new_pos);
|
||
|
}
|
||
|
|
||
|
return std::tuple<size_t, size_t>(GetAutomata().FailState(), new_pos);
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> NextAny(const String<charT> &str, size_t pos)
|
||
|
{
|
||
|
bool r;
|
||
|
size_t new_pos;
|
||
|
std::tie(r, new_pos) = BasicCheck(str, pos);
|
||
|
if (r) {
|
||
|
this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos));
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], new_pos);
|
||
|
}
|
||
|
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], pos);
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> NextStar(const String<charT> &str, size_t pos)
|
||
|
{
|
||
|
bool r;
|
||
|
size_t new_pos;
|
||
|
std::tie(r, new_pos) = BasicCheck(str, pos);
|
||
|
if (r) {
|
||
|
this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos));
|
||
|
if (GetAutomata().GetState(GetNextStates()[1]).Type() == StateType::MATCH &&
|
||
|
new_pos == str.length()) {
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], new_pos);
|
||
|
}
|
||
|
else {
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[0], new_pos);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], pos);
|
||
|
}
|
||
|
|
||
|
std::tuple<size_t, size_t> NextPlus(const String<charT> &str, size_t pos)
|
||
|
{
|
||
|
bool r;
|
||
|
size_t new_pos;
|
||
|
std::tie(r, new_pos) = BasicCheck(str, pos);
|
||
|
if (r) {
|
||
|
match_one_ = true;
|
||
|
this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos));
|
||
|
|
||
|
// if it matches and the string reached at the end, and the next
|
||
|
// state is the match state, goes to next state to avoid state mistake
|
||
|
if (GetAutomata().GetState(GetNextStates()[1]).Type() == StateType::MATCH &&
|
||
|
new_pos == str.length()) {
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], new_pos);
|
||
|
}
|
||
|
else {
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[0], new_pos);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// case where the next state matches and the group already matched
|
||
|
// one time -> goes to next state
|
||
|
bool res = GetAutomata().GetState(GetNextStates()[1]).Check(str, pos);
|
||
|
if (res && match_one_) {
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], pos);
|
||
|
}
|
||
|
|
||
|
if (match_one_) {
|
||
|
return std::tuple<size_t, size_t>(GetNextStates()[1], pos);
|
||
|
}
|
||
|
else {
|
||
|
return std::tuple<size_t, size_t>(GetAutomata().FailState(), new_pos);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
Type type_{};
|
||
|
std::vector<std::unique_ptr<Automata<charT>>> automatas_;
|
||
|
bool match_one_;
|
||
|
};
|
||
|
|
||
|
#define TOKEN(X, Y) X,
|
||
|
enum class TokenKind {
|
||
|
UNKNOWN = 0,
|
||
|
CHAR,
|
||
|
TOKEN(EOS, "end of source") TOKEN(SUB, "-") TOKEN(STAR, "*") TOKEN(QUESTION, "?")
|
||
|
TOKEN(LPAREN, "(") TOKEN(QUESTLPAREN, "?(") TOKEN(STARLPAREN, "*(") TOKEN(PLUSLPAREN, "+(")
|
||
|
TOKEN(NEGLPAREN, "!(") TOKEN(ATLPAREN, "@(") TOKEN(RPAREN, ")") TOKEN(UNION, "|")
|
||
|
TOKEN(LBRACKET, "[") TOKEN(RBRACKET, "]") TOKEN(NEGLBRACKET, "[^") NUM_TOKENS
|
||
|
};
|
||
|
#undef TOKEN
|
||
|
|
||
|
#define TOKEN(X, Y) #X,
|
||
|
static const char *token_name_str[] = {
|
||
|
"UNKNOWN", // UNKNOWN
|
||
|
"CHAR",
|
||
|
TOKEN(EOS, "end of source") TOKEN(SUB, "-") TOKEN(STAR, "*") TOKEN(QUESTION, "?")
|
||
|
TOKEN(LPAREN, "(") TOKEN(QUESTLPAREN, "?(") TOKEN(STARLPAREN, "*(")
|
||
|
TOKEN(PLUSLPAREN, "+(") TOKEN(NEGLPAREN, "!(") TOKEN(ATLPAREN, "@(")
|
||
|
TOKEN(RPAREN, ")") TOKEN(UNION, "|") TOKEN(LBRACKET, "[") TOKEN(RBRACKET, "]")
|
||
|
TOKEN(NEGLBRACKET, "[^") ""};
|
||
|
#undef TOKEN
|
||
|
|
||
|
template <class charT> class Token
|
||
|
{
|
||
|
public:
|
||
|
Token(TokenKind kind) : kind_{kind} {}
|
||
|
Token(TokenKind kind, charT value) : kind_{kind}, value_{value} {}
|
||
|
TokenKind Kind() const { return kind_; }
|
||
|
|
||
|
charT Value() const { return value_; }
|
||
|
|
||
|
bool operator==(TokenKind kind) { return kind_ == kind; }
|
||
|
|
||
|
bool operator==(TokenKind kind) const { return kind_ == kind; }
|
||
|
|
||
|
bool operator!=(TokenKind kind) { return kind_ != kind; }
|
||
|
|
||
|
bool operator!=(TokenKind kind) const { return kind_ != kind; }
|
||
|
|
||
|
private:
|
||
|
template <class charU>
|
||
|
friend std::ostream &operator<<(std::ostream &stream, const Token<charU> &token);
|
||
|
|
||
|
TokenKind kind_;
|
||
|
charT value_{};
|
||
|
};
|
||
|
|
||
|
template <class charT>
|
||
|
inline std::ostream &operator<<(std::ostream &stream, const Token<charT> &token)
|
||
|
{
|
||
|
stream << '[' << token_name_str[static_cast<int>(token.kind_)] << ']';
|
||
|
return stream;
|
||
|
}
|
||
|
|
||
|
template <class charT> class Lexer
|
||
|
{
|
||
|
public:
|
||
|
static const char kEndOfInput = -1;
|
||
|
|
||
|
Lexer(const String<charT> &str) : str_(str), pos_{0}, c_{str[0]} {}
|
||
|
|
||
|
std::vector<Token<charT>> Scanner()
|
||
|
{
|
||
|
std::vector<Token<charT>> tokens;
|
||
|
while (true) {
|
||
|
switch (c_) {
|
||
|
case '?': {
|
||
|
Advance();
|
||
|
if (c_ == '(') {
|
||
|
tokens.push_back(Select(TokenKind::QUESTLPAREN));
|
||
|
Advance();
|
||
|
}
|
||
|
else {
|
||
|
tokens.push_back(Select(TokenKind::QUESTION));
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '*': {
|
||
|
Advance();
|
||
|
if (c_ == '(') {
|
||
|
tokens.push_back(Select(TokenKind::STARLPAREN));
|
||
|
Advance();
|
||
|
}
|
||
|
else {
|
||
|
tokens.push_back(Select(TokenKind::STAR));
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '+': {
|
||
|
Advance();
|
||
|
if (c_ == '(') {
|
||
|
tokens.push_back(Select(TokenKind::PLUSLPAREN));
|
||
|
Advance();
|
||
|
}
|
||
|
else {
|
||
|
tokens.push_back(Select(TokenKind::CHAR, '+'));
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '-': {
|
||
|
tokens.push_back(Select(TokenKind::SUB));
|
||
|
Advance();
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '|': {
|
||
|
tokens.push_back(Select(TokenKind::UNION));
|
||
|
Advance();
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '@': {
|
||
|
Advance();
|
||
|
if (c_ == '(') {
|
||
|
tokens.push_back(Select(TokenKind::ATLPAREN));
|
||
|
Advance();
|
||
|
}
|
||
|
else {
|
||
|
tokens.push_back(Select(TokenKind::CHAR, '@'));
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '!': {
|
||
|
Advance();
|
||
|
if (c_ == '(') {
|
||
|
tokens.push_back(Select(TokenKind::NEGLPAREN));
|
||
|
Advance();
|
||
|
}
|
||
|
else {
|
||
|
tokens.push_back(Select(TokenKind::CHAR, '!'));
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '(': {
|
||
|
tokens.push_back(Select(TokenKind::LPAREN));
|
||
|
Advance();
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case ')': {
|
||
|
tokens.push_back(Select(TokenKind::RPAREN));
|
||
|
Advance();
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '[': {
|
||
|
Advance();
|
||
|
if (c_ == '!') {
|
||
|
tokens.push_back(Select(TokenKind::NEGLBRACKET));
|
||
|
Advance();
|
||
|
}
|
||
|
else {
|
||
|
tokens.push_back(Select(TokenKind::LBRACKET));
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case ']': {
|
||
|
tokens.push_back(Select(TokenKind::RBRACKET));
|
||
|
Advance();
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '\\': {
|
||
|
Advance();
|
||
|
if (c_ == kEndOfInput) {
|
||
|
throw Error("No valid char after '\\'");
|
||
|
}
|
||
|
else if (IsSpecialChar(c_)) {
|
||
|
tokens.push_back(Select(TokenKind::CHAR, c_));
|
||
|
Advance();
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
default: {
|
||
|
if (c_ == kEndOfInput) {
|
||
|
tokens.push_back(Select(TokenKind::EOS));
|
||
|
return tokens;
|
||
|
}
|
||
|
else {
|
||
|
tokens.push_back(Select(TokenKind::CHAR, c_));
|
||
|
Advance();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
inline Token<charT> Select(TokenKind k) { return Token<charT>(k); }
|
||
|
|
||
|
inline Token<charT> Select(TokenKind k, charT value) { return Token<charT>(k, value); }
|
||
|
|
||
|
void Advance()
|
||
|
{
|
||
|
if (pos_ == (str_.length() - 1)) {
|
||
|
c_ = kEndOfInput;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
c_ = str_[++pos_];
|
||
|
}
|
||
|
|
||
|
inline bool IsSpecialChar(charT c)
|
||
|
{
|
||
|
bool b = c == '?' || c == '*' || c == '+' || c == '(' || c == ')' || c == '[' || c == ']' ||
|
||
|
c == '|' || c == '!' || c == '@' || c == '\\';
|
||
|
return b;
|
||
|
}
|
||
|
|
||
|
String<charT> str_;
|
||
|
size_t pos_;
|
||
|
charT c_;
|
||
|
};
|
||
|
|
||
|
#define GLOB_AST_NODE_LIST(V) \
|
||
|
V(CharNode) \
|
||
|
V(RangeNode) \
|
||
|
V(SetItemsNode) \
|
||
|
V(PositiveSetNode) \
|
||
|
V(NegativeSetNode) \
|
||
|
V(StarNode) \
|
||
|
V(AnyNode) \
|
||
|
V(GroupNode) \
|
||
|
V(ConcatNode) \
|
||
|
V(UnionNode) \
|
||
|
V(GlobNode)
|
||
|
|
||
|
template <class charT> class AstVisitor;
|
||
|
|
||
|
// declare all classes used for nodes
|
||
|
#define DECLARE_TYPE_CLASS(type) template <class charT> class type;
|
||
|
GLOB_AST_NODE_LIST(DECLARE_TYPE_CLASS)
|
||
|
#undef DECLARE_TYPE_CLASS
|
||
|
|
||
|
template <class charT> class AstNode
|
||
|
{
|
||
|
public:
|
||
|
enum class Type {
|
||
|
CHAR,
|
||
|
RANGE,
|
||
|
SET_ITEM,
|
||
|
SET_ITEMS,
|
||
|
POS_SET,
|
||
|
NEG_SET,
|
||
|
SET,
|
||
|
STAR,
|
||
|
ANY,
|
||
|
GROUP,
|
||
|
CONCAT_GLOB,
|
||
|
UNION,
|
||
|
GLOB
|
||
|
};
|
||
|
|
||
|
virtual ~AstNode() = default;
|
||
|
|
||
|
Type GetType() const { return type_; }
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) = 0;
|
||
|
|
||
|
protected:
|
||
|
AstNode(Type type) : type_{type} {}
|
||
|
|
||
|
private:
|
||
|
Type type_;
|
||
|
};
|
||
|
|
||
|
template <class charT> using AstNodePtr = std::unique_ptr<AstNode<charT>>;
|
||
|
|
||
|
template <class charT> class AstVisitor
|
||
|
{
|
||
|
public:
|
||
|
// define all visitor methods for the nodes
|
||
|
#define DECLARE_VIRTUAL_FUNC(type) \
|
||
|
virtual void Visit##type(type<charT> * /*node*/) {}
|
||
|
GLOB_AST_NODE_LIST(DECLARE_VIRTUAL_FUNC)
|
||
|
#undef DECLARE_VIRTUAL_FUNC
|
||
|
};
|
||
|
|
||
|
template <class charT> class CharNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
CharNode(charT c) : AstNode<charT>(AstNode<charT>::Type::CHAR), c_{c} {}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitCharNode(this); }
|
||
|
|
||
|
char GetValue() const { return c_; }
|
||
|
|
||
|
private:
|
||
|
charT c_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class RangeNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
RangeNode(AstNodePtr<charT> &&start, AstNodePtr<charT> &&end)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::RANGE), start_{std::move(start)}, end_{
|
||
|
std::move(end)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitRangeNode(this); }
|
||
|
|
||
|
AstNode<charT> *GetStart() const { return start_.get(); }
|
||
|
|
||
|
AstNode<charT> *GetEnd() const { return end_.get(); }
|
||
|
|
||
|
private:
|
||
|
AstNodePtr<charT> start_;
|
||
|
AstNodePtr<charT> end_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class SetItemsNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
SetItemsNode(std::vector<AstNodePtr<charT>> &&items)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::SET_ITEMS), items_{std::move(items)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitSetItemsNode(this); }
|
||
|
|
||
|
std::vector<AstNodePtr<charT>> &GetItems() { return items_; }
|
||
|
|
||
|
private:
|
||
|
std::vector<AstNodePtr<charT>> items_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class PositiveSetNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
PositiveSetNode(AstNodePtr<charT> &&set)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::POS_SET), set_{std::move(set)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitPositiveSetNode(this); }
|
||
|
|
||
|
AstNode<charT> *GetSet() { return set_.get(); }
|
||
|
|
||
|
private:
|
||
|
AstNodePtr<charT> set_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class NegativeSetNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
NegativeSetNode(AstNodePtr<charT> &&set)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::NEG_SET), set_{std::move(set)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitNegativeSetNode(this); }
|
||
|
|
||
|
AstNode<charT> *GetSet() { return set_.get(); }
|
||
|
|
||
|
private:
|
||
|
AstNodePtr<charT> set_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class StarNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
StarNode() : AstNode<charT>(AstNode<charT>::Type::STAR) {}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitStarNode(this); }
|
||
|
};
|
||
|
|
||
|
template <class charT> class AnyNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
AnyNode() : AstNode<charT>(AstNode<charT>::Type::ANY) {}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitAnyNode(this); }
|
||
|
};
|
||
|
|
||
|
template <class charT> class GroupNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
enum class GroupType { BASIC, ANY, STAR, PLUS, NEG, AT };
|
||
|
|
||
|
GroupNode(GroupType group_type, AstNodePtr<charT> &&glob)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::GROUP), glob_{std::move(glob)}, group_type_{
|
||
|
group_type}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitGroupNode(this); }
|
||
|
|
||
|
AstNode<charT> *GetGlob() { return glob_.get(); }
|
||
|
|
||
|
GroupType GetGroupType() const { return group_type_; }
|
||
|
|
||
|
private:
|
||
|
AstNodePtr<charT> glob_;
|
||
|
GroupType group_type_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class ConcatNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
ConcatNode(std::vector<AstNodePtr<charT>> &&basic_glob)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::CONCAT_GLOB), basic_glob_{std::move(basic_glob)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitConcatNode(this); }
|
||
|
|
||
|
std::vector<AstNodePtr<charT>> &GetBasicGlobs() { return basic_glob_; }
|
||
|
|
||
|
private:
|
||
|
std::vector<AstNodePtr<charT>> basic_glob_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class UnionNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
UnionNode(std::vector<AstNodePtr<charT>> &&items)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::UNION), items_{std::move(items)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitUnionNode(this); }
|
||
|
|
||
|
std::vector<AstNodePtr<charT>> &GetItems() { return items_; }
|
||
|
|
||
|
private:
|
||
|
std::vector<AstNodePtr<charT>> items_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class GlobNode : public AstNode<charT>
|
||
|
{
|
||
|
public:
|
||
|
GlobNode(AstNodePtr<charT> &&glob)
|
||
|
: AstNode<charT>(AstNode<charT>::Type::GLOB), glob_{std::move(glob)}
|
||
|
{
|
||
|
}
|
||
|
|
||
|
virtual void Accept(AstVisitor<charT> *visitor) { visitor->VisitGlobNode(this); }
|
||
|
|
||
|
AstNode<charT> *GetConcat() { return glob_.get(); }
|
||
|
|
||
|
private:
|
||
|
AstNodePtr<charT> glob_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class Parser
|
||
|
{
|
||
|
public:
|
||
|
Parser() = delete;
|
||
|
|
||
|
Parser(std::vector<Token<charT>> &&tok_vec) : tok_vec_{std::move(tok_vec)}, pos_{0} {}
|
||
|
|
||
|
AstNodePtr<charT> GenAst() { return ParserGlob(); }
|
||
|
|
||
|
private:
|
||
|
AstNodePtr<charT> ParserChar()
|
||
|
{
|
||
|
Token<charT> &tk = NextToken();
|
||
|
if (tk != TokenKind::CHAR) {
|
||
|
throw Error("char expected");
|
||
|
}
|
||
|
|
||
|
charT c = tk.Value();
|
||
|
return AstNodePtr<charT>(new CharNode<charT>(c));
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserRange()
|
||
|
{
|
||
|
AstNodePtr<charT> char_start = ParserChar();
|
||
|
|
||
|
Token<charT> &tk = NextToken();
|
||
|
if (tk != TokenKind::SUB) {
|
||
|
throw Error("range expected");
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> char_end = ParserChar();
|
||
|
return AstNodePtr<charT>(new RangeNode<charT>(std::move(char_start), std::move(char_end)));
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserSetItem()
|
||
|
{
|
||
|
if (PeekAhead() == TokenKind::SUB) {
|
||
|
return ParserRange();
|
||
|
}
|
||
|
|
||
|
return ParserChar();
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserSetItems()
|
||
|
{
|
||
|
std::vector<AstNodePtr<charT>> items;
|
||
|
|
||
|
do {
|
||
|
items.push_back(ParserSetItem());
|
||
|
} while (GetToken() != TokenKind::RBRACKET);
|
||
|
|
||
|
Advance();
|
||
|
|
||
|
return AstNodePtr<charT>(new SetItemsNode<charT>(std::move(items)));
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserSet()
|
||
|
{
|
||
|
Token<charT> &tk = NextToken();
|
||
|
|
||
|
if (tk == TokenKind::LBRACKET) {
|
||
|
return AstNodePtr<charT>(new PositiveSetNode<charT>(ParserSetItems()));
|
||
|
}
|
||
|
else if (tk == TokenKind::NEGLBRACKET) {
|
||
|
return AstNodePtr<charT>(new NegativeSetNode<charT>(ParserSetItems()));
|
||
|
}
|
||
|
else {
|
||
|
throw Error("set expected");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserBasicGlob()
|
||
|
{
|
||
|
Token<charT> &tk = GetToken();
|
||
|
|
||
|
switch (tk.Kind()) {
|
||
|
case TokenKind::QUESTION: Advance(); return AstNodePtr<charT>(new AnyNode<charT>());
|
||
|
|
||
|
case TokenKind::STAR: Advance(); return AstNodePtr<charT>(new StarNode<charT>());
|
||
|
|
||
|
case TokenKind::SUB: Advance(); return AstNodePtr<charT>(new CharNode<charT>('-'));
|
||
|
|
||
|
case TokenKind::CHAR: return ParserChar();
|
||
|
|
||
|
case TokenKind::LBRACKET:
|
||
|
case TokenKind::NEGLBRACKET: return ParserSet();
|
||
|
|
||
|
case TokenKind::LPAREN:
|
||
|
case TokenKind::QUESTLPAREN:
|
||
|
case TokenKind::STARLPAREN:
|
||
|
case TokenKind::PLUSLPAREN:
|
||
|
case TokenKind::NEGLPAREN:
|
||
|
case TokenKind::ATLPAREN: return ParserGroup();
|
||
|
|
||
|
default: throw Error("basic glob expected");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserGroup()
|
||
|
{
|
||
|
typename GroupNode<charT>::GroupType type;
|
||
|
Token<charT> &tk = NextToken();
|
||
|
|
||
|
switch (tk.Kind()) {
|
||
|
case TokenKind::LPAREN: type = GroupNode<charT>::GroupType::BASIC; break;
|
||
|
|
||
|
case TokenKind::QUESTLPAREN: type = GroupNode<charT>::GroupType::ANY; break;
|
||
|
|
||
|
case TokenKind::STARLPAREN: type = GroupNode<charT>::GroupType::STAR; break;
|
||
|
|
||
|
case TokenKind::PLUSLPAREN: type = GroupNode<charT>::GroupType::PLUS; break;
|
||
|
|
||
|
case TokenKind::NEGLPAREN: type = GroupNode<charT>::GroupType::NEG; break;
|
||
|
|
||
|
case TokenKind::ATLPAREN: type = GroupNode<charT>::GroupType::AT; break;
|
||
|
|
||
|
default: throw Error("Not valid group"); break;
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> group_glob = ParserUnion();
|
||
|
tk = NextToken();
|
||
|
if (tk != TokenKind::RPAREN) {
|
||
|
throw Error("Expected ')' at and of group");
|
||
|
}
|
||
|
|
||
|
return AstNodePtr<charT>(new GroupNode<charT>(type, std::move(group_glob)));
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserConcat()
|
||
|
{
|
||
|
auto check_end = [&]() -> bool {
|
||
|
Token<charT> &tk = GetToken();
|
||
|
|
||
|
switch (tk.Kind()) {
|
||
|
case TokenKind::EOS:
|
||
|
case TokenKind::RPAREN:
|
||
|
case TokenKind::UNION: return true;
|
||
|
|
||
|
default: return false;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
std::vector<AstNodePtr<charT>> parts;
|
||
|
|
||
|
while (!check_end()) {
|
||
|
parts.push_back(ParserBasicGlob());
|
||
|
}
|
||
|
|
||
|
return AstNodePtr<charT>(new ConcatNode<charT>(std::move(parts)));
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserUnion()
|
||
|
{
|
||
|
std::vector<AstNodePtr<charT>> items;
|
||
|
items.push_back(ParserConcat());
|
||
|
|
||
|
while (GetToken() == TokenKind::UNION) {
|
||
|
Advance();
|
||
|
items.push_back(ParserConcat());
|
||
|
}
|
||
|
|
||
|
return AstNodePtr<charT>(new UnionNode<charT>(std::move(items)));
|
||
|
}
|
||
|
|
||
|
AstNodePtr<charT> ParserGlob()
|
||
|
{
|
||
|
AstNodePtr<charT> glob = ParserConcat();
|
||
|
|
||
|
if (GetToken() != TokenKind::EOS) {
|
||
|
throw Error("Expected the end of glob");
|
||
|
}
|
||
|
|
||
|
return AstNodePtr<charT>(new GlobNode<charT>(std::move(glob)));
|
||
|
}
|
||
|
|
||
|
inline const Token<charT> &GetToken() const { return tok_vec_.at(pos_); }
|
||
|
|
||
|
inline Token<charT> &GetToken() { return tok_vec_.at(pos_); }
|
||
|
|
||
|
inline const Token<charT> &PeekAhead() const
|
||
|
{
|
||
|
if (pos_ >= (tok_vec_.size() - 1))
|
||
|
return tok_vec_.back();
|
||
|
|
||
|
return tok_vec_.at(pos_ + 1);
|
||
|
}
|
||
|
|
||
|
inline Token<charT> &NextToken()
|
||
|
{
|
||
|
if (pos_ >= (tok_vec_.size() - 1))
|
||
|
return tok_vec_.back();
|
||
|
|
||
|
Token<charT> &tk = tok_vec_.at(pos_);
|
||
|
pos_++;
|
||
|
return tk;
|
||
|
}
|
||
|
|
||
|
inline bool Advance()
|
||
|
{
|
||
|
if (pos_ == tok_vec_.size() - 1)
|
||
|
return false;
|
||
|
|
||
|
++pos_;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
inline size_t Size() const noexcept { return tok_vec_.size(); }
|
||
|
|
||
|
std::vector<Token<charT>> tok_vec_;
|
||
|
size_t pos_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class AstConsumer
|
||
|
{
|
||
|
public:
|
||
|
AstConsumer() = default;
|
||
|
|
||
|
void GenAutomata(AstNode<charT> *root_node, Automata<charT> &automata)
|
||
|
{
|
||
|
AstNode<charT> *concat_node = static_cast<GlobNode<charT> *>(root_node)->GetConcat();
|
||
|
ExecConcat(concat_node, automata);
|
||
|
|
||
|
size_t match_state = automata.template NewState<StateMatch<charT>>();
|
||
|
automata.GetState(preview_state_).AddNextState(match_state);
|
||
|
automata.SetMatchState(match_state);
|
||
|
|
||
|
size_t fail_state = automata.template NewState<StateFail<charT>>();
|
||
|
automata.SetFailState(fail_state);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
void ExecConcat(AstNode<charT> *node, Automata<charT> &automata)
|
||
|
{
|
||
|
ConcatNode<charT> *concat_node = static_cast<ConcatNode<charT> *>(node);
|
||
|
std::vector<AstNodePtr<charT>> &basic_globs = concat_node->GetBasicGlobs();
|
||
|
|
||
|
for (auto &basic_glob : basic_globs) {
|
||
|
ExecBasicGlob(basic_glob.get(), automata);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ExecBasicGlob(AstNode<charT> *node, Automata<charT> &automata)
|
||
|
{
|
||
|
switch (node->GetType()) {
|
||
|
case AstNode<charT>::Type::CHAR: ExecChar(node, automata); break;
|
||
|
|
||
|
case AstNode<charT>::Type::ANY: ExecAny(node, automata); break;
|
||
|
|
||
|
case AstNode<charT>::Type::STAR: ExecStar(node, automata); break;
|
||
|
|
||
|
case AstNode<charT>::Type::POS_SET: ExecPositiveSet(node, automata); break;
|
||
|
|
||
|
case AstNode<charT>::Type::NEG_SET: ExecNegativeSet(node, automata); break;
|
||
|
|
||
|
case AstNode<charT>::Type::GROUP: ExecGroup(node, automata); break;
|
||
|
|
||
|
default: break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ExecChar(AstNode<charT> *node, Automata<charT> &automata)
|
||
|
{
|
||
|
CharNode<charT> *char_node = static_cast<CharNode<charT> *>(node);
|
||
|
char c = char_node->GetValue();
|
||
|
NewState<StateChar<charT>>(automata, c);
|
||
|
}
|
||
|
|
||
|
void ExecAny(AstNode<charT> *, Automata<charT> &automata)
|
||
|
{
|
||
|
NewState<StateAny<charT>>(automata);
|
||
|
}
|
||
|
|
||
|
void ExecStar(AstNode<charT> *, Automata<charT> &automata)
|
||
|
{
|
||
|
NewState<StateStar<charT>>(automata);
|
||
|
automata.GetState(current_state_).AddNextState(current_state_);
|
||
|
}
|
||
|
|
||
|
void ExecPositiveSet(AstNode<charT> *node, Automata<charT> &automata)
|
||
|
{
|
||
|
PositiveSetNode<charT> *pos_set_node = static_cast<PositiveSetNode<charT> *>(node);
|
||
|
|
||
|
auto items = ProcessSetItems(pos_set_node->GetSet());
|
||
|
NewState<StateSet<charT>>(automata, std::move(items));
|
||
|
}
|
||
|
|
||
|
void ExecNegativeSet(AstNode<charT> *node, Automata<charT> &automata)
|
||
|
{
|
||
|
NegativeSetNode<charT> *pos_set_node = static_cast<NegativeSetNode<charT> *>(node);
|
||
|
|
||
|
auto items = ProcessSetItems(pos_set_node->GetSet());
|
||
|
NewState<StateSet<charT>>(automata, std::move(items), /*neg*/ true);
|
||
|
}
|
||
|
|
||
|
std::vector<std::unique_ptr<SetItem<charT>>> ProcessSetItems(AstNode<charT> *node)
|
||
|
{
|
||
|
SetItemsNode<charT> *set_node = static_cast<SetItemsNode<charT> *>(node);
|
||
|
std::vector<std::unique_ptr<SetItem<charT>>> vec;
|
||
|
auto &items = set_node->GetItems();
|
||
|
for (auto &item : items) {
|
||
|
vec.push_back(std::move(ProcessSetItem(item.get())));
|
||
|
}
|
||
|
|
||
|
return vec;
|
||
|
}
|
||
|
|
||
|
std::unique_ptr<SetItem<charT>> ProcessSetItem(AstNode<charT> *node)
|
||
|
{
|
||
|
if (node->GetType() == AstNode<charT>::Type::CHAR) {
|
||
|
CharNode<charT> *char_node = static_cast<CharNode<charT> *>(node);
|
||
|
char c = char_node->GetValue();
|
||
|
return std::unique_ptr<SetItem<charT>>(new SetItemChar<charT>(c));
|
||
|
}
|
||
|
else if (node->GetType() == AstNode<charT>::Type::RANGE) {
|
||
|
RangeNode<charT> *range_node = static_cast<RangeNode<charT> *>(node);
|
||
|
CharNode<charT> *start_node = static_cast<CharNode<charT> *>(range_node->GetStart());
|
||
|
|
||
|
CharNode<charT> *end_node = static_cast<CharNode<charT> *>(range_node->GetEnd());
|
||
|
|
||
|
char start_char = start_node->GetValue();
|
||
|
char end_char = end_node->GetValue();
|
||
|
return std::unique_ptr<SetItem<charT>>(new SetItemRange<charT>(start_char, end_char));
|
||
|
}
|
||
|
else {
|
||
|
throw Error("Not valid set item");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void ExecGroup(AstNode<charT> *node, Automata<charT> &automata)
|
||
|
{
|
||
|
GroupNode<charT> *group_node = static_cast<GroupNode<charT> *>(node);
|
||
|
AstNode<charT> *union_node = group_node->GetGlob();
|
||
|
std::vector<std::unique_ptr<Automata<charT>>> automatas = ExecUnion(union_node);
|
||
|
|
||
|
typename StateGroup<charT>::Type state_group_type{};
|
||
|
switch (group_node->GetGroupType()) {
|
||
|
case GroupNode<charT>::GroupType::BASIC:
|
||
|
state_group_type = StateGroup<charT>::Type::BASIC;
|
||
|
break;
|
||
|
|
||
|
case GroupNode<charT>::GroupType::ANY: state_group_type = StateGroup<charT>::Type::ANY; break;
|
||
|
|
||
|
case GroupNode<charT>::GroupType::STAR:
|
||
|
state_group_type = StateGroup<charT>::Type::STAR;
|
||
|
break;
|
||
|
|
||
|
case GroupNode<charT>::GroupType::PLUS:
|
||
|
state_group_type = StateGroup<charT>::Type::PLUS;
|
||
|
break;
|
||
|
|
||
|
case GroupNode<charT>::GroupType::AT: state_group_type = StateGroup<charT>::Type::AT; break;
|
||
|
|
||
|
case GroupNode<charT>::GroupType::NEG: state_group_type = StateGroup<charT>::Type::NEG; break;
|
||
|
}
|
||
|
|
||
|
NewState<StateGroup<charT>>(automata, state_group_type, std::move(automatas));
|
||
|
automata.GetState(current_state_).AddNextState(current_state_);
|
||
|
}
|
||
|
|
||
|
std::vector<std::unique_ptr<Automata<charT>>> ExecUnion(AstNode<charT> *node)
|
||
|
{
|
||
|
UnionNode<charT> *union_node = static_cast<UnionNode<charT> *>(node);
|
||
|
auto &items = union_node->GetItems();
|
||
|
std::vector<std::unique_ptr<Automata<charT>>> vec_automatas;
|
||
|
for (auto &item : items) {
|
||
|
std::unique_ptr<Automata<charT>> automata_ptr(new Automata<charT>);
|
||
|
AstConsumer ast_consumer;
|
||
|
ast_consumer.ExecConcat(item.get(), *automata_ptr);
|
||
|
|
||
|
size_t match_state = automata_ptr->template NewState<StateMatch<charT>>();
|
||
|
automata_ptr->GetState(ast_consumer.preview_state_).AddNextState(match_state);
|
||
|
automata_ptr->SetMatchState(match_state);
|
||
|
|
||
|
size_t fail_state = automata_ptr->template NewState<StateFail<charT>>();
|
||
|
automata_ptr->SetFailState(fail_state);
|
||
|
|
||
|
vec_automatas.push_back(std::move(automata_ptr));
|
||
|
}
|
||
|
|
||
|
return vec_automatas;
|
||
|
}
|
||
|
|
||
|
template <class T, typename... Args> void NewState(Automata<charT> &automata, Args &&...args)
|
||
|
{
|
||
|
current_state_ = automata.template NewState<T>(std::forward<Args>(args)...);
|
||
|
if (preview_state_ >= 0) {
|
||
|
automata.GetState(preview_state_).AddNextState(current_state_);
|
||
|
}
|
||
|
preview_state_ = current_state_;
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
int preview_state_ = -1;
|
||
|
size_t current_state_ = 0;
|
||
|
};
|
||
|
|
||
|
template <class charT> class ExtendedGlob
|
||
|
{
|
||
|
public:
|
||
|
ExtendedGlob(const String<charT> &pattern)
|
||
|
{
|
||
|
Lexer<charT> l(pattern);
|
||
|
std::vector<Token<charT>> tokens = l.Scanner();
|
||
|
Parser<charT> p(std::move(tokens));
|
||
|
AstNodePtr<charT> ast_ptr = p.GenAst();
|
||
|
|
||
|
AstConsumer<charT> ast_consumer;
|
||
|
ast_consumer.GenAutomata(ast_ptr.get(), automata_);
|
||
|
}
|
||
|
|
||
|
ExtendedGlob(const ExtendedGlob &) = delete;
|
||
|
ExtendedGlob &operator=(ExtendedGlob &) = delete;
|
||
|
|
||
|
ExtendedGlob(ExtendedGlob &&glob) : automata_{std::move(glob.automata_)} {}
|
||
|
|
||
|
ExtendedGlob &operator=(ExtendedGlob &&glob)
|
||
|
{
|
||
|
automata_ = std::move(glob.automata_);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
bool Exec(const String<charT> &str)
|
||
|
{
|
||
|
bool r;
|
||
|
std::tie(r, std::ignore) = automata_.Exec(str);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
const Automata<charT> &GetAutomata() const { return automata_; }
|
||
|
|
||
|
private:
|
||
|
Automata<charT> automata_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class SimpleGlob
|
||
|
{
|
||
|
public:
|
||
|
SimpleGlob(const String<charT> &pattern) { Parser(pattern); }
|
||
|
|
||
|
SimpleGlob(const SimpleGlob &) = delete;
|
||
|
SimpleGlob &operator=(SimpleGlob &) = delete;
|
||
|
|
||
|
SimpleGlob(SimpleGlob &&glob) : automata_{std::move(glob.automata_)} {}
|
||
|
|
||
|
SimpleGlob &operator=(SimpleGlob &&glob)
|
||
|
{
|
||
|
automata_ = std::move(glob.automata_);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
void Parser(const String<charT> &pattern)
|
||
|
{
|
||
|
size_t pos = 0;
|
||
|
int preview_state = -1;
|
||
|
|
||
|
while (pos < pattern.length()) {
|
||
|
size_t current_state = 0;
|
||
|
char c = pattern[pos];
|
||
|
switch (c) {
|
||
|
case '?': {
|
||
|
current_state = automata_.template NewState<StateAny<charT>>();
|
||
|
++pos;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
case '*': {
|
||
|
current_state = automata_.template NewState<StateStar<charT>>();
|
||
|
automata_.GetState(current_state).AddNextState(current_state);
|
||
|
++pos;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
default: {
|
||
|
current_state = automata_.template NewState<StateChar<charT>>(c);
|
||
|
++pos;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (preview_state >= 0) {
|
||
|
automata_.GetState(preview_state).AddNextState(current_state);
|
||
|
}
|
||
|
preview_state = current_state;
|
||
|
}
|
||
|
|
||
|
size_t match_state = automata_.template NewState<StateMatch<charT>>();
|
||
|
automata_.GetState(preview_state).AddNextState(match_state);
|
||
|
automata_.SetMatchState(match_state);
|
||
|
|
||
|
size_t fail_state = automata_.template NewState<StateFail<charT>>();
|
||
|
automata_.SetFailState(fail_state);
|
||
|
}
|
||
|
|
||
|
bool Exec(const String<charT> &str) const
|
||
|
{
|
||
|
bool r;
|
||
|
std::tie(r, std::ignore) = automata_.Exec(str);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
const Automata<charT> &GetAutomata() const { return automata_; }
|
||
|
|
||
|
private:
|
||
|
Automata<charT> automata_;
|
||
|
};
|
||
|
|
||
|
template <class charT> using extended_glob = ExtendedGlob<charT>;
|
||
|
|
||
|
template <class charT> using no_extended_glob = SimpleGlob<charT>;
|
||
|
|
||
|
template <class charT> class MatchResults;
|
||
|
|
||
|
template <class charT, class globT = extended_glob<charT>> class BasicGlob
|
||
|
{
|
||
|
public:
|
||
|
BasicGlob(const String<charT> &pattern) : glob_{pattern} {}
|
||
|
|
||
|
BasicGlob(const BasicGlob &) = delete;
|
||
|
BasicGlob &operator=(BasicGlob &) = delete;
|
||
|
|
||
|
BasicGlob(BasicGlob &&glob) : glob_{std::move(glob.glob_)} {}
|
||
|
|
||
|
BasicGlob &operator=(BasicGlob &&glob)
|
||
|
{
|
||
|
glob_ = std::move(glob.glob_);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
const Automata<charT> &GetAutomata() const { return glob_.GetAutomata(); }
|
||
|
|
||
|
private:
|
||
|
bool Exec(const String<charT> &str) { return glob_.Exec(str); }
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const String<charU> &str, BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const charU *str, BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const String<charU> &str, MatchResults<charU> &res,
|
||
|
BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const charU *str, MatchResults<charU> &res,
|
||
|
BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
globT glob_;
|
||
|
};
|
||
|
|
||
|
template <class charT> class MatchResults
|
||
|
{
|
||
|
public:
|
||
|
using const_iterator = typename std::vector<String<charT>>::const_iterator;
|
||
|
|
||
|
MatchResults() = default;
|
||
|
|
||
|
MatchResults(const MatchResults &m) : results_{m.results_} {}
|
||
|
|
||
|
MatchResults(MatchResults &&m) : results_{std::move(m.results_)} {}
|
||
|
|
||
|
MatchResults &operator=(const MatchResults &m)
|
||
|
{
|
||
|
results_ = m.results_;
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
MatchResults &operator=(MatchResults &&m)
|
||
|
{
|
||
|
results_ = std::move(m.results_);
|
||
|
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
bool empty() const { return results_.empty(); }
|
||
|
|
||
|
size_t size() const { return results_.size(); }
|
||
|
|
||
|
const_iterator begin() const noexcept { return results_.begin(); }
|
||
|
|
||
|
const_iterator end() const noexcept { return results_.end(); }
|
||
|
|
||
|
const_iterator cbegin() const noexcept { return results_.cbegin(); }
|
||
|
|
||
|
const_iterator cend() const noexcept { return results_.cend(); }
|
||
|
|
||
|
String<charT> &operator[](size_t n) const { return results_[n]; }
|
||
|
|
||
|
private:
|
||
|
void SetResults(std::vector<String<charT>> &&results) { results_ = std::move(results); }
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const String<charU> &str, BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const charU *str, BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const String<charU> &str, MatchResults<charU> &res,
|
||
|
BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
template <class charU, class globU>
|
||
|
friend bool glob_match(const charU *str, MatchResults<charU> &res,
|
||
|
BasicGlob<charU, globU> &glob);
|
||
|
|
||
|
std::vector<String<charT>> results_;
|
||
|
};
|
||
|
|
||
|
template <class charT, class globT = extended_glob<charT>>
|
||
|
bool glob_match(const String<charT> &str, BasicGlob<charT, globT> &glob)
|
||
|
{
|
||
|
return glob.Exec(str);
|
||
|
}
|
||
|
|
||
|
template <class charT, class globT = extended_glob<charT>>
|
||
|
bool glob_match(const charT *str, BasicGlob<charT, globT> &glob)
|
||
|
{
|
||
|
return glob.Exec(str);
|
||
|
}
|
||
|
|
||
|
template <class charT, class globT = extended_glob<charT>>
|
||
|
bool glob_match(const String<charT> &str, MatchResults<charT> &res, BasicGlob<charT, globT> &glob)
|
||
|
{
|
||
|
bool r = glob.Exec(str);
|
||
|
res.SetResults(glob.GetAutomata().GetMatchedStrings());
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
template <class charT, class globT = extended_glob<charT>>
|
||
|
bool glob_match(const charT *str, MatchResults<charT> &res, BasicGlob<charT, globT> &glob)
|
||
|
{
|
||
|
bool r = glob.Exec(str);
|
||
|
res.SetResults(glob.GetAutomata().GetMatchedStrings());
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
template <class charT, class globT = extended_glob<charT>>
|
||
|
using basic_glob = BasicGlob<charT, globT>;
|
||
|
|
||
|
using glob = basic_glob<char, extended_glob<char>>;
|
||
|
|
||
|
using wglob = basic_glob<wchar_t, extended_glob<wchar_t>>;
|
||
|
|
||
|
using cmatch = MatchResults<char>;
|
||
|
|
||
|
using wmatch = MatchResults<wchar_t>;
|
||
|
|
||
|
} // namespace glob
|