// Apache License // Version 2.0, January 2004 // http://www.apache.org/licenses/ // https://github.com/alexst07/glob-cpp #pragma once #include #include #include #include namespace glob { template using String = std::basic_string; template class Automata; class Error : public std::exception { public: Error(const std::string &msg) : msg_{msg} {} const char *what() const throw() override { return msg_.c_str(); } private: std::string msg_; }; enum class StateType { MATCH, FAIL, CHAR, QUESTION, MULT, SET, GROUP, UNION, }; // From cppreference.com template const T exchange(T &obj, U &&new_value) { T old_value = std::move(obj); obj = std::forward(new_value); return old_value; } template class State { public: State(StateType type, Automata &states) : type_(type), states_(states) {} virtual ~State() = default; virtual bool Check(const String &str, size_t pos) = 0; virtual std::tuple Next(const String &str, size_t pos) = 0; StateType Type() const { return type_; } Automata &GetAutomata() { return states_; } void AddNextState(size_t state_pos) { next_states_.push_back(state_pos); } const std::vector &GetNextStates() const { return next_states_; } const String &MatchedStr() { return matched_str_; } virtual void ResetState() {} protected: void SetMatchedStr(const String &str) { matched_str_ = str; } void SetMatchedStr(charT c) { matched_str_ = c; } private: StateType type_; Automata &states_; std::vector next_states_; String matched_str_; }; template class StateFail : public State { public: StateFail(Automata &states) : State(StateType::FAIL, states) {} bool Check(const String &, size_t) override { return false; } std::tuple Next(const String &, size_t pos) override { return std::tuple(0, ++pos); } }; template class StateMatch : public State { public: StateMatch(Automata &states) : State(StateType::MATCH, states) {} bool Check(const String &, size_t) override { return true; } std::tuple Next(const String &, size_t pos) override { return std::tuple(0, ++pos); } }; template class Automata { public: Automata() = default; Automata(const Automata &) = delete; Automata &operator=(const Automata &automata) = delete; Automata(Automata &&automata) : states_{std::move(automata.states_)}, match_state_{automata.match_state_}, fail_state_{exchange(automata.fail_state_, 0)}, start_state_{ exchange(automata.start_state_, 0)} { } Automata &operator=(Automata &&automata) { states_ = std::move(automata.states_); match_state_ = automata.match_state_; fail_state_ = automata.fail_state_; start_state_ = automata.start_state_; return *this; } const State &GetState(size_t pos) const { return *states_[pos]; } State &GetState(size_t pos) { return *states_[pos]; } size_t FailState() const { return fail_state_; } Automata &SetFailState(size_t state_pos) { fail_state_ = state_pos; return *this; } Automata &SetMatchState(size_t state_pos) { match_state_ = state_pos; return *this; } size_t GetNumStates() const { return states_.size(); } std::tuple Exec(const String &str, bool comp_end = true) { auto r = ExecAux(str, comp_end); ResetStates(); return r; } std::vector> GetMatchedStrings() const { std::vector> vec; for (auto &state : states_) { if (state->Type() == StateType::MULT || state->Type() == StateType::QUESTION || state->Type() == StateType::GROUP || state->Type() == StateType::SET) { vec.push_back(state->MatchedStr()); } } return vec; } template size_t NewState(Args &&...args) { size_t state_pos = states_.size(); auto state = std::unique_ptr>(new T(*this, std::forward(args)...)); states_.push_back(std::move(state)); return state_pos; } size_t fail_state_; private: std::tuple ExecAux(const String &str, bool comp_end = true) const { size_t state_pos = 0; size_t str_pos = 0; // run the state vector until state reaches fail or match state, or // until the string is all consumed while (state_pos != fail_state_ && state_pos != match_state_ && str_pos < str.length()) { std::tie(state_pos, str_pos) = states_[state_pos]->Next(str, str_pos); } // if comp_end is true it matches only if the automata reached the end of // the string if (comp_end) { if ((state_pos == match_state_) && (str_pos == str.length())) { return std::tuple(state_pos == match_state_, str_pos); } return std::tuple(false, str_pos); } else { // if comp_end is false, compare only if the states reached the // match state return std::tuple(state_pos == match_state_, str_pos); } } void ResetStates() { for (auto &state : states_) { state->ResetState(); } } std::vector>> states_; size_t match_state_; size_t start_state_; }; template class StateChar : public State { using State::GetNextStates; using State::GetAutomata; public: StateChar(Automata &states, charT c) : State(StateType::CHAR, states), c_{c} {} bool Check(const String &str, size_t pos) override { return (c_ == str[pos]); } std::tuple Next(const String &str, size_t pos) override { if (c_ == str[pos]) { this->SetMatchedStr(c_); return std::tuple(GetNextStates()[0], pos + 1); } return std::tuple(GetAutomata().FailState(), pos + 1); } private: charT c_; }; template class StateAny : public State { using State::GetNextStates; using State::GetAutomata; public: StateAny(Automata &states) : State(StateType::QUESTION, states) {} bool Check(const String &, size_t) override { // as it match any char, it is always trye return true; } std::tuple Next(const String &str, size_t pos) override { this->SetMatchedStr(str[pos]); // state any always match with any char return std::tuple(GetNextStates()[0], pos + 1); } }; template class StateStar : public State { using State::GetNextStates; using State::GetAutomata; public: StateStar(Automata &states) : State(StateType::MULT, states) {} bool Check(const String &, size_t) override { // as it match any char, it is always trye return true; } std::tuple Next(const String &str, size_t pos) override { // next state vector from StateStar has two elements, the element 0 points // to the same state, and the element points to next state if the // conditions is satisfied if (GetAutomata().GetState(GetNextStates()[1]).Type() == StateType::MATCH) { // this case occurs when star is in the end of the glob, so the pos is // the end of the string, because all string is consumed this->SetMatchedStr(str.substr(pos)); return std::tuple(GetNextStates()[1], str.length()); } bool res = GetAutomata().GetState(GetNextStates()[1]).Check(str, pos); // if the next state is satisfied goes to next state if (res) { return std::tuple(GetNextStates()[1], pos); } // while the next state check is false, the string is consumed by star state this->SetMatchedStr(this->MatchedStr() + str[pos]); return std::tuple(GetNextStates()[0], pos + 1); } }; template class SetItem { public: SetItem() = default; virtual ~SetItem() = default; virtual bool Check(charT c) const = 0; }; template class SetItemChar : public SetItem { public: SetItemChar(charT c) : c_{c} {} bool Check(charT c) const override { return c == c_; } private: charT c_; }; template class SetItemRange : public SetItem { public: SetItemRange(charT start, charT end) : start_{start < end ? start : end}, end_{start < end ? end : start} { } bool Check(charT c) const override { return (c >= start_) && (c <= end_); } private: charT start_; charT end_; }; template class StateSet : public State { using State::GetNextStates; using State::GetAutomata; public: StateSet(Automata &states, std::vector>> items, bool neg = false) : State(StateType::SET, states), items_{std::move(items)}, neg_{neg} { } bool SetCheck(const String &str, size_t pos) const { for (auto &item : items_) { // if any item match, then the set match with char if (item.get()->Check(str[pos])) { return true; } } return false; } bool Check(const String &str, size_t pos) override { if (neg_) { return !SetCheck(str, pos); } return SetCheck(str, pos); } std::tuple Next(const String &str, size_t pos) override { if (Check(str, pos)) { this->SetMatchedStr(str[pos]); return std::tuple(GetNextStates()[0], pos + 1); } return std::tuple(GetAutomata().FailState(), pos + 1); } private: std::vector>> items_; bool neg_; }; template class StateGroup : public State { using State::GetNextStates; using State::GetAutomata; public: enum class Type { BASIC, ANY, STAR, PLUS, NEG, AT }; StateGroup(Automata &states, Type type, std::vector>> &&automatas) : State(StateType::GROUP, states), type_{type}, automatas_{std::move(automatas)}, match_one_{false} { } void ResetState() override { match_one_ = false; } std::tuple BasicCheck(const String &str, size_t pos) { String str_part = str.substr(pos); bool r; size_t str_pos = 0; // each automata is a part of a union of the group, in basic check, // we want find only if any automata is true for (auto &automata : automatas_) { std::tie(r, str_pos) = automata->Exec(str_part, false); if (r) { return std::tuple(r, pos + str_pos); } } return std::tuple(false, pos + str_pos); } bool Check(const String &str, size_t pos) override { bool r = false; switch (type_) { case Type::BASIC: case Type::AT: case Type::ANY: case Type::STAR: case Type::PLUS: case Type::NEG: { std::tie(r, std::ignore) = BasicCheck(str, pos); break; } } return r; } std::tuple Next(const String &str, size_t pos) override { // STATE 1 -> is the next state // STATE 0 -> is the same state switch (type_) { case Type::BASIC: case Type::AT: { return NextBasic(str, pos); } case Type::ANY: { return NextAny(str, pos); } case Type::STAR: { return NextStar(str, pos); } case Type::PLUS: { return NextPlus(str, pos); } case Type::NEG: { return NextNeg(str, pos); } } return std::tuple(0, 0); } std::tuple NextNeg(const String &str, size_t pos) { bool r; size_t new_pos; std::tie(r, new_pos) = BasicCheck(str, pos); if (r) { this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos)); return std::tuple(GetAutomata().FailState(), new_pos); } return std::tuple(GetNextStates()[1], pos); } std::tuple NextBasic(const String &str, size_t pos) { bool r; size_t new_pos; std::tie(r, new_pos) = BasicCheck(str, pos); if (r) { this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos)); return std::tuple(GetNextStates()[1], new_pos); } return std::tuple(GetAutomata().FailState(), new_pos); } std::tuple NextAny(const String &str, size_t pos) { bool r; size_t new_pos; std::tie(r, new_pos) = BasicCheck(str, pos); if (r) { this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos)); return std::tuple(GetNextStates()[1], new_pos); } return std::tuple(GetNextStates()[1], pos); } std::tuple NextStar(const String &str, size_t pos) { bool r; size_t new_pos; std::tie(r, new_pos) = BasicCheck(str, pos); if (r) { this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos)); if (GetAutomata().GetState(GetNextStates()[1]).Type() == StateType::MATCH && new_pos == str.length()) { return std::tuple(GetNextStates()[1], new_pos); } else { return std::tuple(GetNextStates()[0], new_pos); } } return std::tuple(GetNextStates()[1], pos); } std::tuple NextPlus(const String &str, size_t pos) { bool r; size_t new_pos; std::tie(r, new_pos) = BasicCheck(str, pos); if (r) { match_one_ = true; this->SetMatchedStr(this->MatchedStr() + str.substr(pos, new_pos - pos)); // if it matches and the string reached at the end, and the next // state is the match state, goes to next state to avoid state mistake if (GetAutomata().GetState(GetNextStates()[1]).Type() == StateType::MATCH && new_pos == str.length()) { return std::tuple(GetNextStates()[1], new_pos); } else { return std::tuple(GetNextStates()[0], new_pos); } } // case where the next state matches and the group already matched // one time -> goes to next state bool res = GetAutomata().GetState(GetNextStates()[1]).Check(str, pos); if (res && match_one_) { return std::tuple(GetNextStates()[1], pos); } if (match_one_) { return std::tuple(GetNextStates()[1], pos); } else { return std::tuple(GetAutomata().FailState(), new_pos); } } private: Type type_{}; std::vector>> automatas_; bool match_one_; }; #define TOKEN(X, Y) X, enum class TokenKind { UNKNOWN = 0, CHAR, TOKEN(EOS, "end of source") TOKEN(SUB, "-") TOKEN(STAR, "*") TOKEN(QUESTION, "?") TOKEN(LPAREN, "(") TOKEN(QUESTLPAREN, "?(") TOKEN(STARLPAREN, "*(") TOKEN(PLUSLPAREN, "+(") TOKEN(NEGLPAREN, "!(") TOKEN(ATLPAREN, "@(") TOKEN(RPAREN, ")") TOKEN(UNION, "|") TOKEN(LBRACKET, "[") TOKEN(RBRACKET, "]") TOKEN(NEGLBRACKET, "[^") NUM_TOKENS }; #undef TOKEN #define TOKEN(X, Y) #X, static const char *token_name_str[] = { "UNKNOWN", // UNKNOWN "CHAR", TOKEN(EOS, "end of source") TOKEN(SUB, "-") TOKEN(STAR, "*") TOKEN(QUESTION, "?") TOKEN(LPAREN, "(") TOKEN(QUESTLPAREN, "?(") TOKEN(STARLPAREN, "*(") TOKEN(PLUSLPAREN, "+(") TOKEN(NEGLPAREN, "!(") TOKEN(ATLPAREN, "@(") TOKEN(RPAREN, ")") TOKEN(UNION, "|") TOKEN(LBRACKET, "[") TOKEN(RBRACKET, "]") TOKEN(NEGLBRACKET, "[^") ""}; #undef TOKEN template class Token { public: Token(TokenKind kind) : kind_{kind} {} Token(TokenKind kind, charT value) : kind_{kind}, value_{value} {} TokenKind Kind() const { return kind_; } charT Value() const { return value_; } bool operator==(TokenKind kind) { return kind_ == kind; } bool operator==(TokenKind kind) const { return kind_ == kind; } bool operator!=(TokenKind kind) { return kind_ != kind; } bool operator!=(TokenKind kind) const { return kind_ != kind; } private: template friend std::ostream &operator<<(std::ostream &stream, const Token &token); TokenKind kind_; charT value_{}; }; template inline std::ostream &operator<<(std::ostream &stream, const Token &token) { stream << '[' << token_name_str[static_cast(token.kind_)] << ']'; return stream; } template class Lexer { public: static const char kEndOfInput = -1; Lexer(const String &str) : str_(str), pos_{0}, c_{str[0]} {} std::vector> Scanner() { std::vector> tokens; while (true) { switch (c_) { case '?': { Advance(); if (c_ == '(') { tokens.push_back(Select(TokenKind::QUESTLPAREN)); Advance(); } else { tokens.push_back(Select(TokenKind::QUESTION)); } break; } case '*': { Advance(); if (c_ == '(') { tokens.push_back(Select(TokenKind::STARLPAREN)); Advance(); } else { tokens.push_back(Select(TokenKind::STAR)); } break; } case '+': { Advance(); if (c_ == '(') { tokens.push_back(Select(TokenKind::PLUSLPAREN)); Advance(); } else { tokens.push_back(Select(TokenKind::CHAR, '+')); } break; } case '-': { tokens.push_back(Select(TokenKind::SUB)); Advance(); break; } case '|': { tokens.push_back(Select(TokenKind::UNION)); Advance(); break; } case '@': { Advance(); if (c_ == '(') { tokens.push_back(Select(TokenKind::ATLPAREN)); Advance(); } else { tokens.push_back(Select(TokenKind::CHAR, '@')); } break; } case '!': { Advance(); if (c_ == '(') { tokens.push_back(Select(TokenKind::NEGLPAREN)); Advance(); } else { tokens.push_back(Select(TokenKind::CHAR, '!')); } break; } case '(': { tokens.push_back(Select(TokenKind::LPAREN)); Advance(); break; } case ')': { tokens.push_back(Select(TokenKind::RPAREN)); Advance(); break; } case '[': { Advance(); if (c_ == '!') { tokens.push_back(Select(TokenKind::NEGLBRACKET)); Advance(); } else { tokens.push_back(Select(TokenKind::LBRACKET)); } break; } case ']': { tokens.push_back(Select(TokenKind::RBRACKET)); Advance(); break; } case '\\': { Advance(); if (c_ == kEndOfInput) { throw Error("No valid char after '\\'"); } else if (IsSpecialChar(c_)) { tokens.push_back(Select(TokenKind::CHAR, c_)); Advance(); } break; } default: { if (c_ == kEndOfInput) { tokens.push_back(Select(TokenKind::EOS)); return tokens; } else { tokens.push_back(Select(TokenKind::CHAR, c_)); Advance(); } } } } } private: inline Token Select(TokenKind k) { return Token(k); } inline Token Select(TokenKind k, charT value) { return Token(k, value); } void Advance() { if (pos_ == (str_.length() - 1)) { c_ = kEndOfInput; return; } c_ = str_[++pos_]; } inline bool IsSpecialChar(charT c) { bool b = c == '?' || c == '*' || c == '+' || c == '(' || c == ')' || c == '[' || c == ']' || c == '|' || c == '!' || c == '@' || c == '\\'; return b; } String str_; size_t pos_; charT c_; }; #define GLOB_AST_NODE_LIST(V) \ V(CharNode) \ V(RangeNode) \ V(SetItemsNode) \ V(PositiveSetNode) \ V(NegativeSetNode) \ V(StarNode) \ V(AnyNode) \ V(GroupNode) \ V(ConcatNode) \ V(UnionNode) \ V(GlobNode) template class AstVisitor; // declare all classes used for nodes #define DECLARE_TYPE_CLASS(type) template class type; GLOB_AST_NODE_LIST(DECLARE_TYPE_CLASS) #undef DECLARE_TYPE_CLASS template class AstNode { public: enum class Type { CHAR, RANGE, SET_ITEM, SET_ITEMS, POS_SET, NEG_SET, SET, STAR, ANY, GROUP, CONCAT_GLOB, UNION, GLOB }; virtual ~AstNode() = default; Type GetType() const { return type_; } virtual void Accept(AstVisitor *visitor) = 0; protected: AstNode(Type type) : type_{type} {} private: Type type_; }; template using AstNodePtr = std::unique_ptr>; template class AstVisitor { public: // define all visitor methods for the nodes #define DECLARE_VIRTUAL_FUNC(type) \ virtual void Visit##type(type * /*node*/) {} GLOB_AST_NODE_LIST(DECLARE_VIRTUAL_FUNC) #undef DECLARE_VIRTUAL_FUNC }; template class CharNode : public AstNode { public: CharNode(charT c) : AstNode(AstNode::Type::CHAR), c_{c} {} virtual void Accept(AstVisitor *visitor) { visitor->VisitCharNode(this); } char GetValue() const { return c_; } private: charT c_; }; template class RangeNode : public AstNode { public: RangeNode(AstNodePtr &&start, AstNodePtr &&end) : AstNode(AstNode::Type::RANGE), start_{std::move(start)}, end_{ std::move(end)} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitRangeNode(this); } AstNode *GetStart() const { return start_.get(); } AstNode *GetEnd() const { return end_.get(); } private: AstNodePtr start_; AstNodePtr end_; }; template class SetItemsNode : public AstNode { public: SetItemsNode(std::vector> &&items) : AstNode(AstNode::Type::SET_ITEMS), items_{std::move(items)} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitSetItemsNode(this); } std::vector> &GetItems() { return items_; } private: std::vector> items_; }; template class PositiveSetNode : public AstNode { public: PositiveSetNode(AstNodePtr &&set) : AstNode(AstNode::Type::POS_SET), set_{std::move(set)} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitPositiveSetNode(this); } AstNode *GetSet() { return set_.get(); } private: AstNodePtr set_; }; template class NegativeSetNode : public AstNode { public: NegativeSetNode(AstNodePtr &&set) : AstNode(AstNode::Type::NEG_SET), set_{std::move(set)} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitNegativeSetNode(this); } AstNode *GetSet() { return set_.get(); } private: AstNodePtr set_; }; template class StarNode : public AstNode { public: StarNode() : AstNode(AstNode::Type::STAR) {} virtual void Accept(AstVisitor *visitor) { visitor->VisitStarNode(this); } }; template class AnyNode : public AstNode { public: AnyNode() : AstNode(AstNode::Type::ANY) {} virtual void Accept(AstVisitor *visitor) { visitor->VisitAnyNode(this); } }; template class GroupNode : public AstNode { public: enum class GroupType { BASIC, ANY, STAR, PLUS, NEG, AT }; GroupNode(GroupType group_type, AstNodePtr &&glob) : AstNode(AstNode::Type::GROUP), glob_{std::move(glob)}, group_type_{ group_type} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitGroupNode(this); } AstNode *GetGlob() { return glob_.get(); } GroupType GetGroupType() const { return group_type_; } private: AstNodePtr glob_; GroupType group_type_; }; template class ConcatNode : public AstNode { public: ConcatNode(std::vector> &&basic_glob) : AstNode(AstNode::Type::CONCAT_GLOB), basic_glob_{std::move(basic_glob)} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitConcatNode(this); } std::vector> &GetBasicGlobs() { return basic_glob_; } private: std::vector> basic_glob_; }; template class UnionNode : public AstNode { public: UnionNode(std::vector> &&items) : AstNode(AstNode::Type::UNION), items_{std::move(items)} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitUnionNode(this); } std::vector> &GetItems() { return items_; } private: std::vector> items_; }; template class GlobNode : public AstNode { public: GlobNode(AstNodePtr &&glob) : AstNode(AstNode::Type::GLOB), glob_{std::move(glob)} { } virtual void Accept(AstVisitor *visitor) { visitor->VisitGlobNode(this); } AstNode *GetConcat() { return glob_.get(); } private: AstNodePtr glob_; }; template class Parser { public: Parser() = delete; Parser(std::vector> &&tok_vec) : tok_vec_{std::move(tok_vec)}, pos_{0} {} AstNodePtr GenAst() { return ParserGlob(); } private: AstNodePtr ParserChar() { Token &tk = NextToken(); if (tk != TokenKind::CHAR) { throw Error("char expected"); } charT c = tk.Value(); return AstNodePtr(new CharNode(c)); } AstNodePtr ParserRange() { AstNodePtr char_start = ParserChar(); Token &tk = NextToken(); if (tk != TokenKind::SUB) { throw Error("range expected"); } AstNodePtr char_end = ParserChar(); return AstNodePtr(new RangeNode(std::move(char_start), std::move(char_end))); } AstNodePtr ParserSetItem() { if (PeekAhead() == TokenKind::SUB) { return ParserRange(); } return ParserChar(); } AstNodePtr ParserSetItems() { std::vector> items; do { items.push_back(ParserSetItem()); } while (GetToken() != TokenKind::RBRACKET); Advance(); return AstNodePtr(new SetItemsNode(std::move(items))); } AstNodePtr ParserSet() { Token &tk = NextToken(); if (tk == TokenKind::LBRACKET) { return AstNodePtr(new PositiveSetNode(ParserSetItems())); } else if (tk == TokenKind::NEGLBRACKET) { return AstNodePtr(new NegativeSetNode(ParserSetItems())); } else { throw Error("set expected"); } } AstNodePtr ParserBasicGlob() { Token &tk = GetToken(); switch (tk.Kind()) { case TokenKind::QUESTION: Advance(); return AstNodePtr(new AnyNode()); case TokenKind::STAR: Advance(); return AstNodePtr(new StarNode()); case TokenKind::SUB: Advance(); return AstNodePtr(new CharNode('-')); case TokenKind::CHAR: return ParserChar(); case TokenKind::LBRACKET: case TokenKind::NEGLBRACKET: return ParserSet(); case TokenKind::LPAREN: case TokenKind::QUESTLPAREN: case TokenKind::STARLPAREN: case TokenKind::PLUSLPAREN: case TokenKind::NEGLPAREN: case TokenKind::ATLPAREN: return ParserGroup(); default: throw Error("basic glob expected"); } } AstNodePtr ParserGroup() { typename GroupNode::GroupType type; Token &tk = NextToken(); switch (tk.Kind()) { case TokenKind::LPAREN: type = GroupNode::GroupType::BASIC; break; case TokenKind::QUESTLPAREN: type = GroupNode::GroupType::ANY; break; case TokenKind::STARLPAREN: type = GroupNode::GroupType::STAR; break; case TokenKind::PLUSLPAREN: type = GroupNode::GroupType::PLUS; break; case TokenKind::NEGLPAREN: type = GroupNode::GroupType::NEG; break; case TokenKind::ATLPAREN: type = GroupNode::GroupType::AT; break; default: throw Error("Not valid group"); break; } AstNodePtr group_glob = ParserUnion(); tk = NextToken(); if (tk != TokenKind::RPAREN) { throw Error("Expected ')' at and of group"); } return AstNodePtr(new GroupNode(type, std::move(group_glob))); } AstNodePtr ParserConcat() { auto check_end = [&]() -> bool { Token &tk = GetToken(); switch (tk.Kind()) { case TokenKind::EOS: case TokenKind::RPAREN: case TokenKind::UNION: return true; default: return false; } }; std::vector> parts; while (!check_end()) { parts.push_back(ParserBasicGlob()); } return AstNodePtr(new ConcatNode(std::move(parts))); } AstNodePtr ParserUnion() { std::vector> items; items.push_back(ParserConcat()); while (GetToken() == TokenKind::UNION) { Advance(); items.push_back(ParserConcat()); } return AstNodePtr(new UnionNode(std::move(items))); } AstNodePtr ParserGlob() { AstNodePtr glob = ParserConcat(); if (GetToken() != TokenKind::EOS) { throw Error("Expected the end of glob"); } return AstNodePtr(new GlobNode(std::move(glob))); } inline const Token &GetToken() const { return tok_vec_.at(pos_); } inline Token &GetToken() { return tok_vec_.at(pos_); } inline const Token &PeekAhead() const { if (pos_ >= (tok_vec_.size() - 1)) return tok_vec_.back(); return tok_vec_.at(pos_ + 1); } inline Token &NextToken() { if (pos_ >= (tok_vec_.size() - 1)) return tok_vec_.back(); Token &tk = tok_vec_.at(pos_); pos_++; return tk; } inline bool Advance() { if (pos_ == tok_vec_.size() - 1) return false; ++pos_; return true; } inline size_t Size() const noexcept { return tok_vec_.size(); } std::vector> tok_vec_; size_t pos_; }; template class AstConsumer { public: AstConsumer() = default; void GenAutomata(AstNode *root_node, Automata &automata) { AstNode *concat_node = static_cast *>(root_node)->GetConcat(); ExecConcat(concat_node, automata); size_t match_state = automata.template NewState>(); automata.GetState(preview_state_).AddNextState(match_state); automata.SetMatchState(match_state); size_t fail_state = automata.template NewState>(); automata.SetFailState(fail_state); } private: void ExecConcat(AstNode *node, Automata &automata) { ConcatNode *concat_node = static_cast *>(node); std::vector> &basic_globs = concat_node->GetBasicGlobs(); for (auto &basic_glob : basic_globs) { ExecBasicGlob(basic_glob.get(), automata); } } void ExecBasicGlob(AstNode *node, Automata &automata) { switch (node->GetType()) { case AstNode::Type::CHAR: ExecChar(node, automata); break; case AstNode::Type::ANY: ExecAny(node, automata); break; case AstNode::Type::STAR: ExecStar(node, automata); break; case AstNode::Type::POS_SET: ExecPositiveSet(node, automata); break; case AstNode::Type::NEG_SET: ExecNegativeSet(node, automata); break; case AstNode::Type::GROUP: ExecGroup(node, automata); break; default: break; } } void ExecChar(AstNode *node, Automata &automata) { CharNode *char_node = static_cast *>(node); char c = char_node->GetValue(); NewState>(automata, c); } void ExecAny(AstNode *, Automata &automata) { NewState>(automata); } void ExecStar(AstNode *, Automata &automata) { NewState>(automata); automata.GetState(current_state_).AddNextState(current_state_); } void ExecPositiveSet(AstNode *node, Automata &automata) { PositiveSetNode *pos_set_node = static_cast *>(node); auto items = ProcessSetItems(pos_set_node->GetSet()); NewState>(automata, std::move(items)); } void ExecNegativeSet(AstNode *node, Automata &automata) { NegativeSetNode *pos_set_node = static_cast *>(node); auto items = ProcessSetItems(pos_set_node->GetSet()); NewState>(automata, std::move(items), /*neg*/ true); } std::vector>> ProcessSetItems(AstNode *node) { SetItemsNode *set_node = static_cast *>(node); std::vector>> vec; auto &items = set_node->GetItems(); for (auto &item : items) { vec.push_back(std::move(ProcessSetItem(item.get()))); } return vec; } std::unique_ptr> ProcessSetItem(AstNode *node) { if (node->GetType() == AstNode::Type::CHAR) { CharNode *char_node = static_cast *>(node); char c = char_node->GetValue(); return std::unique_ptr>(new SetItemChar(c)); } else if (node->GetType() == AstNode::Type::RANGE) { RangeNode *range_node = static_cast *>(node); CharNode *start_node = static_cast *>(range_node->GetStart()); CharNode *end_node = static_cast *>(range_node->GetEnd()); char start_char = start_node->GetValue(); char end_char = end_node->GetValue(); return std::unique_ptr>(new SetItemRange(start_char, end_char)); } else { throw Error("Not valid set item"); } } void ExecGroup(AstNode *node, Automata &automata) { GroupNode *group_node = static_cast *>(node); AstNode *union_node = group_node->GetGlob(); std::vector>> automatas = ExecUnion(union_node); typename StateGroup::Type state_group_type{}; switch (group_node->GetGroupType()) { case GroupNode::GroupType::BASIC: state_group_type = StateGroup::Type::BASIC; break; case GroupNode::GroupType::ANY: state_group_type = StateGroup::Type::ANY; break; case GroupNode::GroupType::STAR: state_group_type = StateGroup::Type::STAR; break; case GroupNode::GroupType::PLUS: state_group_type = StateGroup::Type::PLUS; break; case GroupNode::GroupType::AT: state_group_type = StateGroup::Type::AT; break; case GroupNode::GroupType::NEG: state_group_type = StateGroup::Type::NEG; break; } NewState>(automata, state_group_type, std::move(automatas)); automata.GetState(current_state_).AddNextState(current_state_); } std::vector>> ExecUnion(AstNode *node) { UnionNode *union_node = static_cast *>(node); auto &items = union_node->GetItems(); std::vector>> vec_automatas; for (auto &item : items) { std::unique_ptr> automata_ptr(new Automata); AstConsumer ast_consumer; ast_consumer.ExecConcat(item.get(), *automata_ptr); size_t match_state = automata_ptr->template NewState>(); automata_ptr->GetState(ast_consumer.preview_state_).AddNextState(match_state); automata_ptr->SetMatchState(match_state); size_t fail_state = automata_ptr->template NewState>(); automata_ptr->SetFailState(fail_state); vec_automatas.push_back(std::move(automata_ptr)); } return vec_automatas; } template void NewState(Automata &automata, Args &&...args) { current_state_ = automata.template NewState(std::forward(args)...); if (preview_state_ >= 0) { automata.GetState(preview_state_).AddNextState(current_state_); } preview_state_ = current_state_; } private: int preview_state_ = -1; size_t current_state_ = 0; }; template class ExtendedGlob { public: ExtendedGlob(const String &pattern) { Lexer l(pattern); std::vector> tokens = l.Scanner(); Parser p(std::move(tokens)); AstNodePtr ast_ptr = p.GenAst(); AstConsumer ast_consumer; ast_consumer.GenAutomata(ast_ptr.get(), automata_); } ExtendedGlob(const ExtendedGlob &) = delete; ExtendedGlob &operator=(ExtendedGlob &) = delete; ExtendedGlob(ExtendedGlob &&glob) : automata_{std::move(glob.automata_)} {} ExtendedGlob &operator=(ExtendedGlob &&glob) { automata_ = std::move(glob.automata_); return *this; } bool Exec(const String &str) { bool r; std::tie(r, std::ignore) = automata_.Exec(str); return r; } const Automata &GetAutomata() const { return automata_; } private: Automata automata_; }; template class SimpleGlob { public: SimpleGlob(const String &pattern) { Parser(pattern); } SimpleGlob(const SimpleGlob &) = delete; SimpleGlob &operator=(SimpleGlob &) = delete; SimpleGlob(SimpleGlob &&glob) : automata_{std::move(glob.automata_)} {} SimpleGlob &operator=(SimpleGlob &&glob) { automata_ = std::move(glob.automata_); return *this; } void Parser(const String &pattern) { size_t pos = 0; int preview_state = -1; while (pos < pattern.length()) { size_t current_state = 0; char c = pattern[pos]; switch (c) { case '?': { current_state = automata_.template NewState>(); ++pos; break; } case '*': { current_state = automata_.template NewState>(); automata_.GetState(current_state).AddNextState(current_state); ++pos; break; } default: { current_state = automata_.template NewState>(c); ++pos; break; } } if (preview_state >= 0) { automata_.GetState(preview_state).AddNextState(current_state); } preview_state = current_state; } size_t match_state = automata_.template NewState>(); automata_.GetState(preview_state).AddNextState(match_state); automata_.SetMatchState(match_state); size_t fail_state = automata_.template NewState>(); automata_.SetFailState(fail_state); } bool Exec(const String &str) const { bool r; std::tie(r, std::ignore) = automata_.Exec(str); return r; } const Automata &GetAutomata() const { return automata_; } private: Automata automata_; }; template using extended_glob = ExtendedGlob; template using no_extended_glob = SimpleGlob; template class MatchResults; template > class BasicGlob { public: BasicGlob(const String &pattern) : glob_{pattern} {} BasicGlob(const BasicGlob &) = delete; BasicGlob &operator=(BasicGlob &) = delete; BasicGlob(BasicGlob &&glob) : glob_{std::move(glob.glob_)} {} BasicGlob &operator=(BasicGlob &&glob) { glob_ = std::move(glob.glob_); return *this; } const Automata &GetAutomata() const { return glob_.GetAutomata(); } private: bool Exec(const String &str) { return glob_.Exec(str); } template friend bool glob_match(const String &str, BasicGlob &glob); template friend bool glob_match(const charU *str, BasicGlob &glob); template friend bool glob_match(const String &str, MatchResults &res, BasicGlob &glob); template friend bool glob_match(const charU *str, MatchResults &res, BasicGlob &glob); globT glob_; }; template class MatchResults { public: using const_iterator = typename std::vector>::const_iterator; MatchResults() = default; MatchResults(const MatchResults &m) : results_{m.results_} {} MatchResults(MatchResults &&m) : results_{std::move(m.results_)} {} MatchResults &operator=(const MatchResults &m) { results_ = m.results_; return *this; } MatchResults &operator=(MatchResults &&m) { results_ = std::move(m.results_); return *this; } bool empty() const { return results_.empty(); } size_t size() const { return results_.size(); } const_iterator begin() const noexcept { return results_.begin(); } const_iterator end() const noexcept { return results_.end(); } const_iterator cbegin() const noexcept { return results_.cbegin(); } const_iterator cend() const noexcept { return results_.cend(); } String &operator[](size_t n) const { return results_[n]; } private: void SetResults(std::vector> &&results) { results_ = std::move(results); } template friend bool glob_match(const String &str, BasicGlob &glob); template friend bool glob_match(const charU *str, BasicGlob &glob); template friend bool glob_match(const String &str, MatchResults &res, BasicGlob &glob); template friend bool glob_match(const charU *str, MatchResults &res, BasicGlob &glob); std::vector> results_; }; template > bool glob_match(const String &str, BasicGlob &glob) { return glob.Exec(str); } template > bool glob_match(const charT *str, BasicGlob &glob) { return glob.Exec(str); } template > bool glob_match(const String &str, MatchResults &res, BasicGlob &glob) { bool r = glob.Exec(str); res.SetResults(glob.GetAutomata().GetMatchedStrings()); return r; } template > bool glob_match(const charT *str, MatchResults &res, BasicGlob &glob) { bool r = glob.Exec(str); res.SetResults(glob.GetAutomata().GetMatchedStrings()); return r; } template > using basic_glob = BasicGlob; using glob = basic_glob>; using wglob = basic_glob>; using cmatch = MatchResults; using wmatch = MatchResults; } // namespace glob