annotate dep/anitomy/anitomy/parser_number.cpp @ 137:69db40272acd

dep/animia: [WIP] huge refactor this WILL NOT compile, because lots of code has been changed and every API in the original codebase has been removed. note that this api setup is not exactly permanent...
author Paper <mrpapersonic@gmail.com>
date Fri, 10 Nov 2023 13:52:47 -0500
parents 5c0397762b53
children a0aa8c8c4307
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
1 /*
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
2 ** Copyright (c) 2014-2017, Eren Okka
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
3 **
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
4 ** This Source Code Form is subject to the terms of the Mozilla Public
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
5 ** License, v. 2.0. If a copy of the MPL was not distributed with this
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
6 ** file, You can obtain one at https://mozilla.org/MPL/2.0/.
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
7 */
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
8
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
9 #include <algorithm>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
10 #include <regex>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
11
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
12 #include "element.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
13 #include "keyword.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
14 #include "parser.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
15 #include "string.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
16
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
17 namespace anitomy {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
18
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
19 bool Parser::IsValidEpisodeNumber(const string_t& number) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
20 return StringToInt(number) <= kEpisodeNumberMax;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
21 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
22
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
23 bool Parser::SetEpisodeNumber(const string_t& number, Token& token, bool validate) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
24 if (validate && !IsValidEpisodeNumber(number))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
25 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
26
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
27 token.category = kIdentifier;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
28
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
29 auto category = kElementEpisodeNumber;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
30
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
31 // Handle equivalent numbers
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
32 if (found_episode_keywords_) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
33 for (auto& element : elements_) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
34 if (element.first != kElementEpisodeNumber)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
35 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
36 // The larger number gets to be the alternative one
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
37 const int comparison = StringToInt(number) - StringToInt(element.second);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
38 if (comparison > 0) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
39 category = kElementEpisodeNumberAlt;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
40 } else if (comparison < 0) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
41 element.first = kElementEpisodeNumberAlt;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
42 } else {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
43 return false; // No need to add the same number twice
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
44 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
45 break;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
46 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
47 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
48
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
49 elements_.insert(category, number);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
50 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
51 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
52
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
53 bool Parser::SetAlternativeEpisodeNumber(const string_t& number, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
54 elements_.insert(kElementEpisodeNumberAlt, number);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
55 token.category = kIdentifier;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
56
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
57 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
58 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
59
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
60 ////////////////////////////////////////////////////////////////////////////////
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
61
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
62 bool Parser::IsValidVolumeNumber(const string_t& number) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
63 return StringToInt(number) <= kVolumeNumberMax;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
64 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
65
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
66 bool Parser::SetVolumeNumber(const string_t& number, Token& token, bool validate) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
67 if (validate)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
68 if (!IsValidVolumeNumber(number))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
69 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
70
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
71 elements_.insert(kElementVolumeNumber, number);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
72 token.category = kIdentifier;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
73 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
74 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
75
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
76 ////////////////////////////////////////////////////////////////////////////////
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
77
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
78 bool Parser::NumberComesAfterPrefix(ElementCategory category, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
79 size_t number_begin = FindNumberInString(token.content);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
80 auto prefix = keyword_manager.Normalize(token.content.substr(0, number_begin));
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
81
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
82 if (keyword_manager.Find(category, prefix)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
83 auto number = token.content.substr(number_begin, token.content.length() - number_begin);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
84 switch (category) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
85 case kElementEpisodePrefix:
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
86 if (!MatchEpisodePatterns(number, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
87 SetEpisodeNumber(number, token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
88 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
89 case kElementVolumePrefix:
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
90 if (!MatchVolumePatterns(number, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
91 SetVolumeNumber(number, token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
92 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
93 default: break;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
94 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
95 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
96
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
97 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
98 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
99
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
100 bool Parser::NumberComesBeforeAnotherNumber(const token_iterator_t token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
101 auto separator_token = FindNextToken(tokens_, token, kFlagNotDelimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
102
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
103 if (separator_token != tokens_.end()) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
104 static const std::vector<std::pair<string_t, bool>> separators{
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
105 {L"&", true },
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
106 {L"of", false},
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
107 };
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
108 for (const auto& separator : separators) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
109 if (IsStringEqualTo(separator_token->content, separator.first)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
110 auto other_token = FindNextToken(tokens_, separator_token, kFlagNotDelimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
111 if (other_token != tokens_.end() && IsNumericString(other_token->content)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
112 SetEpisodeNumber(token->content, *token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
113 if (separator.second)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
114 SetEpisodeNumber(other_token->content, *other_token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
115 separator_token->category = kIdentifier;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
116 other_token->category = kIdentifier;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
117 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
118 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
119 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
120 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
121 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
122
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
123 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
124 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
125
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
126 bool Parser::SearchForEpisodePatterns(std::vector<size_t>& tokens) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
127 for (const auto& token_index : tokens) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
128 auto token = tokens_.begin() + token_index;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
129 bool numeric_front = IsNumericChar(token->content.front());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
130
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
131 if (!numeric_front) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
132 // e.g. "EP.1", "Vol.1"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
133 if (NumberComesAfterPrefix(kElementEpisodePrefix, *token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
134 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
135 if (NumberComesAfterPrefix(kElementVolumePrefix, *token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
136 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
137 } else {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
138 // e.g. "8 & 10", "01 of 24"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
139 if (NumberComesBeforeAnotherNumber(token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
140 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
141 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
142 // Look for other patterns
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
143 if (MatchEpisodePatterns(token->content, *token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
144 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
145 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
146
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
147 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
148 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
149
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
150 ////////////////////////////////////////////////////////////////////////////////
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
151
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
152 using regex_t = std::basic_regex<char_t>;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
153 using regex_match_results_t = std::match_results<string_t::const_iterator>;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
154
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
155 bool Parser::MatchSingleEpisodePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
156 static const regex_t pattern(L"(\\d{1,4})[vV](\\d)");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
157 regex_match_results_t match_results;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
158
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
159 if (std::regex_match(word, match_results, pattern)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
160 SetEpisodeNumber(match_results[1].str(), token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
161 elements_.insert(kElementReleaseVersion, match_results[2].str());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
162 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
163 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
164
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
165 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
166 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
167
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
168 bool Parser::MatchMultiEpisodePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
169 static const regex_t pattern(L"(\\d{1,4})(?:[vV](\\d))?[-~&+]"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
170 L"(\\d{1,4})(?:[vV](\\d))?");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
171 regex_match_results_t match_results;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
172
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
173 if (std::regex_match(word, match_results, pattern)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
174 auto lower_bound = match_results[1].str();
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
175 auto upper_bound = match_results[3].str();
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
176 // Avoid matching expressions such as "009-1" or "5-2"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
177 if (StringToInt(lower_bound) < StringToInt(upper_bound)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
178 if (SetEpisodeNumber(lower_bound, token, true)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
179 SetEpisodeNumber(upper_bound, token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
180 if (match_results[2].matched)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
181 elements_.insert(kElementReleaseVersion, match_results[2].str());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
182 if (match_results[4].matched)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
183 elements_.insert(kElementReleaseVersion, match_results[4].str());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
184 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
185 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
186 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
187 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
188
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
189 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
190 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
191
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
192 bool Parser::MatchSeasonAndEpisodePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
193 static const regex_t pattern(L"S?"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
194 L"(\\d{1,2})(?:-S?(\\d{1,2}))?"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
195 L"(?:x|[ ._-x]?E)"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
196 L"(\\d{1,4})(?:-E?(\\d{1,4}))?"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
197 L"(?:[vV](\\d))?",
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
198 std::regex_constants::icase);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
199 regex_match_results_t match_results;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
200
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
201 if (std::regex_match(word, match_results, pattern)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
202 if (StringToInt(match_results[1]) == 0)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
203 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
204 elements_.insert(kElementAnimeSeason, match_results[1]);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
205 if (match_results[2].matched)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
206 elements_.insert(kElementAnimeSeason, match_results[2]);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
207 SetEpisodeNumber(match_results[3], token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
208 if (match_results[4].matched)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
209 SetEpisodeNumber(match_results[4], token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
210 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
211 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
212
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
213 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
214 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
215
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
216 bool Parser::MatchTypeAndEpisodePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
217 size_t number_begin = FindNumberInString(word);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
218 auto prefix = word.substr(0, number_begin);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
219
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
220 ElementCategory category = kElementAnimeType;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
221 KeywordOptions options;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
222
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
223 if (keyword_manager.Find(keyword_manager.Normalize(prefix), category, options)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
224 elements_.insert(kElementAnimeType, prefix);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
225 auto number = word.substr(number_begin);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
226 if (MatchEpisodePatterns(number, token) || SetEpisodeNumber(number, token, true)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
227 auto it = std::find(tokens_.begin(), tokens_.end(), token);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
228 if (it != tokens_.end()) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
229 // Split token (we do this last in order to avoid invalidating our
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
230 // token reference earlier)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
231 token.content = number;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
232 tokens_.insert(it, Token(options.identifiable ? kIdentifier : kUnknown, prefix, token.enclosed));
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
233 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
234 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
235 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
236 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
237
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
238 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
239 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
240
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
241 bool Parser::MatchFractionalEpisodePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
242 // We don't allow any fractional part other than ".5", because there are cases
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
243 // where such a number is a part of the anime title (e.g. "Evangelion: 1.11",
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
244 // "Tokyo Magnitude 8.0") or a keyword (e.g. "5.1").
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
245 static const regex_t pattern(L"\\d+\\.5");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
246
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
247 if (std::regex_match(word, pattern))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
248 if (SetEpisodeNumber(word, token, true))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
249 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
250
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
251 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
252 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
253
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
254 bool Parser::MatchPartialEpisodePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
255 auto it = std::find_if_not(word.begin(), word.end(), IsNumericChar);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
256 auto suffix_length = std::distance(it, word.end());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
257
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
258 auto is_valid_suffix = [](const char_t c) { return (c >= L'A' && c <= L'C') || (c >= L'a' && c <= L'c'); };
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
259
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
260 if (suffix_length == 1 && is_valid_suffix(*it))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
261 if (SetEpisodeNumber(word, token, true))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
262 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
263
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
264 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
265 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
266
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
267 bool Parser::MatchNumberSignPattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
268 if (word.front() != L'#')
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
269 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
270
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
271 static const regex_t pattern(L"#(\\d{1,4})(?:[-~&+](\\d{1,4}))?(?:[vV](\\d))?");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
272 regex_match_results_t match_results;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
273
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
274 if (std::regex_match(word, match_results, pattern)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
275 if (SetEpisodeNumber(match_results[1].str(), token, true)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
276 if (match_results[2].matched)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
277 SetEpisodeNumber(match_results[2].str(), token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
278 if (match_results[3].matched)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
279 elements_.insert(kElementReleaseVersion, match_results[3].str());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
280 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
281 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
282 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
283
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
284 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
285 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
286
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
287 bool Parser::MatchJapaneseCounterPattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
288 if (word.back() != L'\u8A71')
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
289 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
290
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
291 static const regex_t pattern(L"(\\d{1,4})\u8A71");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
292 regex_match_results_t match_results;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
293
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
294 if (std::regex_match(word, match_results, pattern)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
295 SetEpisodeNumber(match_results[1].str(), token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
296 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
297 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
298
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
299 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
300 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
301
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
302 bool Parser::MatchEpisodePatterns(string_t word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
303 // All patterns contain at least one non-numeric character
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
304 if (IsNumericString(word))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
305 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
306
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
307 TrimString(word, L" -");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
308
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
309 const bool numeric_front = IsNumericChar(word.front());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
310 const bool numeric_back = IsNumericChar(word.back());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
311
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
312 // e.g. "01v2"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
313 if (numeric_front && numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
314 if (MatchSingleEpisodePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
315 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
316 // e.g. "01-02", "03-05v2"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
317 if (numeric_front && numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
318 if (MatchMultiEpisodePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
319 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
320 // e.g. "2x01", "S01E03", "S01-02xE001-150"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
321 if (numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
322 if (MatchSeasonAndEpisodePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
323 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
324 // e.g. "ED1", "OP4a", "OVA2"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
325 if (!numeric_front)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
326 if (MatchTypeAndEpisodePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
327 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
328 // e.g. "07.5"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
329 if (numeric_front && numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
330 if (MatchFractionalEpisodePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
331 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
332 // e.g. "4a", "111C"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
333 if (numeric_front && !numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
334 if (MatchPartialEpisodePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
335 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
336 // e.g. "#01", "#02-03v2"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
337 if (numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
338 if (MatchNumberSignPattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
339 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
340 // U+8A71 is used as counter for stories, episodes of TV series, etc.
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
341 if (numeric_front)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
342 if (MatchJapaneseCounterPattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
343 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
344
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
345 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
346 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
347
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
348 ////////////////////////////////////////////////////////////////////////////////
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
349
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
350 bool Parser::MatchSingleVolumePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
351 static const regex_t pattern(L"(\\d{1,2})[vV](\\d)");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
352 regex_match_results_t match_results;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
353
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
354 if (std::regex_match(word, match_results, pattern)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
355 SetVolumeNumber(match_results[1].str(), token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
356 elements_.insert(kElementReleaseVersion, match_results[2].str());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
357 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
358 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
359
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
360 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
361 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
362
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
363 bool Parser::MatchMultiVolumePattern(const string_t& word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
364 static const regex_t pattern(L"(\\d{1,2})[-~&+](\\d{1,2})(?:[vV](\\d))?");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
365 regex_match_results_t match_results;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
366
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
367 if (std::regex_match(word, match_results, pattern)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
368 auto lower_bound = match_results[1].str();
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
369 auto upper_bound = match_results[2].str();
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
370 if (StringToInt(lower_bound) < StringToInt(upper_bound)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
371 if (SetVolumeNumber(lower_bound, token, true)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
372 SetVolumeNumber(upper_bound, token, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
373 if (match_results[3].matched)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
374 elements_.insert(kElementReleaseVersion, match_results[3].str());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
375 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
376 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
377 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
378 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
379
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
380 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
381 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
382
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
383 bool Parser::MatchVolumePatterns(string_t word, Token& token) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
384 // All patterns contain at least one non-numeric character
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
385 if (IsNumericString(word))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
386 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
387
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
388 TrimString(word, L" -");
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
389
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
390 const bool numeric_front = IsNumericChar(word.front());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
391 const bool numeric_back = IsNumericChar(word.back());
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
392
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
393 // e.g. "01v2"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
394 if (numeric_front && numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
395 if (MatchSingleVolumePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
396 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
397 // e.g. "01-02", "03-05v2"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
398 if (numeric_front && numeric_back)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
399 if (MatchMultiVolumePattern(word, token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
400 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
401
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
402 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
403 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
404
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
405 ////////////////////////////////////////////////////////////////////////////////
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
406
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
407 bool Parser::SearchForEquivalentNumbers(std::vector<size_t>& tokens) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
408 for (auto token_index = tokens.begin(); token_index != tokens.end(); ++token_index) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
409 auto token = tokens_.begin() + *token_index;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
410
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
411 if (IsTokenIsolated(token) || !IsValidEpisodeNumber(token->content))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
412 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
413
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
414 // Find the first enclosed, non-delimiter token
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
415 auto next_token = FindNextToken(tokens_, token, kFlagNotDelimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
416 if (!CheckTokenCategory(next_token, kBracket))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
417 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
418 next_token = FindNextToken(tokens_, next_token, kFlagEnclosed | kFlagNotDelimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
419 if (!CheckTokenCategory(next_token, kUnknown))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
420 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
421
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
422 // Check if it's an isolated number
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
423 if (!IsTokenIsolated(next_token) || !IsNumericString(next_token->content) ||
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
424 !IsValidEpisodeNumber(next_token->content))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
425 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
426
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
427 auto minmax = std::minmax(token, next_token, [](const token_iterator_t& a, const token_iterator_t& b) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
428 return StringToInt(a->content) < StringToInt(b->content);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
429 });
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
430 SetEpisodeNumber(minmax.first->content, *minmax.first, false);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
431 SetAlternativeEpisodeNumber(minmax.second->content, *minmax.second);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
432
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
433 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
434 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
435
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
436 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
437 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
438
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
439 bool Parser::SearchForIsolatedNumbers(std::vector<size_t>& tokens) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
440 for (auto token_index = tokens.begin(); token_index != tokens.end(); ++token_index) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
441 auto token = tokens_.begin() + *token_index;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
442
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
443 if (!token->enclosed || !IsTokenIsolated(token))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
444 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
445
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
446 if (SetEpisodeNumber(token->content, *token, true))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
447 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
448 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
449
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
450 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
451 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
452
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
453 bool Parser::SearchForSeparatedNumbers(std::vector<size_t>& tokens) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
454 for (auto token_index = tokens.begin(); token_index != tokens.end(); ++token_index) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
455 auto token = tokens_.begin() + *token_index;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
456 auto previous_token = FindPreviousToken(tokens_, token, kFlagNotDelimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
457
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
458 // See if the number has a preceding "-" separator
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
459 if (CheckTokenCategory(previous_token, kUnknown) && IsDashCharacter(previous_token->content)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
460 if (SetEpisodeNumber(token->content, *token, true)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
461 previous_token->category = kIdentifier;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
462 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
463 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
464 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
465 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
466
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
467 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
468 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
469
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
470 bool Parser::SearchForLastNumber(std::vector<size_t>& tokens) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
471 for (auto it = tokens.rbegin(); it != tokens.rend(); ++it) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
472 size_t token_index = *it;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
473 auto token = tokens_.begin() + token_index;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
474
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
475 // Assuming that episode number always comes after the title, first token
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
476 // cannot be what we're looking for
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
477 if (token_index == 0)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
478 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
479
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
480 // An enclosed token is unlikely to be the episode number at this point
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
481 if (token->enclosed)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
482 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
483
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
484 // Ignore if it's the first non-enclosed, non-delimiter token
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
485 if (std::all_of(tokens_.begin(), token,
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
486 [](const Token& token) { return token.enclosed || token.category == kDelimiter; }))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
487 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
488
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
489 // Ignore if the previous token is "Movie" or "Part"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
490 auto previous_token = FindPreviousToken(tokens_, token, kFlagNotDelimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
491 if (CheckTokenCategory(previous_token, kUnknown)) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
492 if (IsStringEqualTo(previous_token->content, L"Movie") ||
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
493 IsStringEqualTo(previous_token->content, L"Part")) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
494 continue;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
495 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
496 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
497
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
498 // We'll use this number after all
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
499 if (SetEpisodeNumber(token->content, *token, true))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
500 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
501 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
502
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
503 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
504 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
505
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
506 } // namespace anitomy