add:添加去除非ASCII附近空白函数。

This commit is contained in:
taynpg 2025-01-14 10:30:07 +08:00
parent 6f2ab1c560
commit 6cada82609
4 changed files with 58 additions and 1 deletions

View File

@ -24,6 +24,7 @@ set(SRC_FILES
) )
include_directories(include) include_directories(include)
include_directories(3rd)
if(DEFINED USE_TEST) if(DEFINED USE_TEST)
message(STATUS "USE TEST") message(STATUS "USE TEST")
enable_testing() enable_testing()

View File

@ -245,6 +245,11 @@ public:
static std::string u8ToGBK(const std::string& str); static std::string u8ToGBK(const std::string& str);
static std::string GBKTou8(const std::string& str); static std::string GBKTou8(const std::string& str);
#endif #endif
/// @brief 删除,段落中的空白字符,如[你好 啊,在 哪里 ?] => [你好啊,在哪里?]
/// 仅处理非 ASCII 码附近的内容。
/// @param str
/// @return
static ofString rbs(const ofString& str);
}; };
typedef class CThreadSleep typedef class CThreadSleep

View File

@ -2,6 +2,7 @@
#include <chrono> #include <chrono>
#include <iomanip> #include <iomanip>
#include <sstream> #include <sstream>
#include <utf8.h>
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
@ -150,6 +151,47 @@ std::string CCodec::GBKTou8(const std::string& str)
return utf8Str; return utf8Str;
} }
#endif #endif
ofString CCodec::rbs(const ofString& str)
{
std::string utf8_str;
#ifdef UNICODE_OFSTR
utf8::utf16to8(str.begin(), str.end(), std::back_inserter(utf8_str));
#else
utf8_str = str;
#endif
std::vector<char32_t> unicode_chars;
utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars));
std::vector<char32_t> processed_chars;
for (size_t i = 0; i < unicode_chars.size(); ++i) {
char32_t current = unicode_chars[i];
if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') {
bool near_non_ascii = false;
if (i > 0 && unicode_chars[i - 1] > 0x7F) {
near_non_ascii = true;
}
if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) {
near_non_ascii = true;
}
if (near_non_ascii) {
continue;
}
}
processed_chars.push_back(current);
}
std::string result_utf8;
utf8::utf32to8(processed_chars.begin(), processed_chars.end(), std::back_inserter(result_utf8));
ofString result;
#ifdef UNICODE_OFSTR
utf8::utf8to16(result_utf8.begin(), result_utf8.end(), std::back_inserter(result));
#else
result = result_utf8;
#endif
return result;
}
CThreadSleep::CThreadSleep() CThreadSleep::CThreadSleep()
{ {
is_stop_sleep_ = false; is_stop_sleep_ = false;

View File

@ -1,6 +1,7 @@
#include <iostream> #include <iostream>
#include "of_str.h" #include <of_str.h>
#include <of_path.h> #include <of_path.h>
#include <of_util.h>
#include <cassert> #include <cassert>
using namespace ofen; using namespace ofen;
@ -19,10 +20,18 @@ void testB()
assert(rp == ofT("cpNiz")); assert(rp == ofT("cpNiz"));
} }
void testC()
{
std::string source(u8"这是 一 个测试 用例。 ");
std::string expect(u8"这是一个测试用例。");
assert(CCodec::rbs(source) == expect);
}
int main() int main()
{ {
testA(); testA();
testB(); testB();
testC();
std::cout << "Done" << std::endl; std::cout << "Done" << std::endl;
return 0; return 0;
} }