Mercurial > minori
comparison src/core/strings.cc @ 264:9a04802848c0
*: improve multiple things
e.g. making some strings.cc functions modify strings in-place,
improving m4_ax_have_qt.m4 code, making anime_db.cc rely on
std::optional rather than std::shared_ptr (which was stupid
anyway)
| author | Paper <paper@paper.us.eu.org> |
|---|---|
| date | Thu, 11 Apr 2024 10:15:57 -0400 |
| parents | dd211ff68b36 |
| children | f31305b9f60a |
comparison
equal
deleted
inserted
replaced
| 263:96416310ea14 | 264:9a04802848c0 |
|---|---|
| 16 #include <locale> | 16 #include <locale> |
| 17 #include <string> | 17 #include <string> |
| 18 #include <unordered_map> | 18 #include <unordered_map> |
| 19 #include <vector> | 19 #include <vector> |
| 20 | 20 |
| 21 #include "utf8proc.h" | |
| 22 | |
| 21 namespace Strings { | 23 namespace Strings { |
| 22 | 24 |
| 23 /* ew */ | 25 /* ew */ |
| 24 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { | 26 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { |
| 25 if (vector.size() < 1) | 27 if (vector.size() < 1) |
| 94 {"13", "XIII"} | 96 {"13", "XIII"} |
| 95 }; | 97 }; |
| 96 | 98 |
| 97 for (const auto& item : vec) | 99 for (const auto& item : vec) |
| 98 ReplaceAll(string, item.second, item.first); | 100 ReplaceAll(string, item.second, item.first); |
| 101 } | |
| 102 | |
| 103 /* this also performs case folding, so our string is lowercase after this */ | |
| 104 void NormalizeUnicode(std::string& string) { | |
| 105 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( | |
| 106 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | | |
| 107 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | | |
| 108 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS | |
| 109 ); | |
| 110 | |
| 111 /* ack */ | |
| 112 utf8proc_uint8_t* buf = nullptr; | |
| 113 | |
| 114 const utf8proc_ssize_t size = utf8proc_map( | |
| 115 reinterpret_cast<const utf8proc_uint8_t*>(string.data()), | |
| 116 string.size(), | |
| 117 &buf, | |
| 118 options | |
| 119 ); | |
| 120 | |
| 121 if (size) | |
| 122 string = std::string(reinterpret_cast<const char*>(buf), size); | |
| 123 | |
| 124 if (buf) | |
| 125 free(buf); | |
| 126 } | |
| 127 | |
| 128 void NormalizeAnimeTitle(std::string& string) { | |
| 129 ConvertRomanNumerals(string); | |
| 130 NormalizeUnicode(string); | |
| 131 RemoveLeadingChars(string, ' '); | |
| 132 RemoveTrailingChars(string, ' '); | |
| 99 } | 133 } |
| 100 | 134 |
| 101 /* removes dumb HTML tags because anilist is aids and | 135 /* removes dumb HTML tags because anilist is aids and |
| 102 * gives us HTML for synopses :/ | 136 * gives us HTML for synopses :/ |
| 103 */ | 137 */ |
| 228 } | 262 } |
| 229 | 263 |
| 230 return ToInt(str, 0); | 264 return ToInt(str, 0); |
| 231 } | 265 } |
| 232 | 266 |
| 233 std::string RemoveLeadingChars(std::string s, const char c) { | 267 void RemoveLeadingChars(std::string& s, const char c) { |
| 234 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); | 268 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); |
| 235 return s; | 269 } |
| 236 } | 270 |
| 237 | 271 void RemoveTrailingChars(std::string& s, const char c) { |
| 238 std::string RemoveTrailingChars(std::string s, const char c) { | |
| 239 s.erase(s.find_last_not_of(c) + 1, std::string::npos); | 272 s.erase(s.find_last_not_of(c) + 1, std::string::npos); |
| 240 return s; | |
| 241 } | 273 } |
| 242 | 274 |
| 243 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { | 275 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { |
| 244 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) | 276 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) |
| 245 if (str[i] != sub[i]) | 277 if (str[i] != sub[i]) |
