add:添加去除非ASCII附近空白函数。
This commit is contained in:
parent
6f2ab1c560
commit
6cada82609
@ -24,6 +24,7 @@ set(SRC_FILES
|
||||
)
|
||||
|
||||
include_directories(include)
|
||||
include_directories(3rd)
|
||||
if(DEFINED USE_TEST)
|
||||
message(STATUS "USE TEST")
|
||||
enable_testing()
|
||||
|
@ -245,6 +245,11 @@ public:
|
||||
static std::string u8ToGBK(const std::string& str);
|
||||
static std::string GBKTou8(const std::string& str);
|
||||
#endif
|
||||
/// @brief 删除,段落中的空白字符,如[你好 啊,在 哪里 ?] => [你好啊,在哪里?]
|
||||
/// 仅处理非 ASCII 码附近的内容。
|
||||
/// @param str
|
||||
/// @return
|
||||
static ofString rbs(const ofString& str);
|
||||
};
|
||||
|
||||
typedef class CThreadSleep
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <utf8.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
@ -150,6 +151,47 @@ std::string CCodec::GBKTou8(const std::string& str)
|
||||
return utf8Str;
|
||||
}
|
||||
#endif
|
||||
|
||||
ofString CCodec::rbs(const ofString& str)
|
||||
{
|
||||
std::string utf8_str;
|
||||
#ifdef UNICODE_OFSTR
|
||||
utf8::utf16to8(str.begin(), str.end(), std::back_inserter(utf8_str));
|
||||
#else
|
||||
utf8_str = str;
|
||||
#endif
|
||||
std::vector<char32_t> unicode_chars;
|
||||
utf8::utf8to32(utf8_str.begin(), utf8_str.end(), std::back_inserter(unicode_chars));
|
||||
std::vector<char32_t> processed_chars;
|
||||
for (size_t i = 0; i < unicode_chars.size(); ++i) {
|
||||
char32_t current = unicode_chars[i];
|
||||
if (current == U' ' || current == U'\t' || current == U'\n' || current == U'\r') {
|
||||
bool near_non_ascii = false;
|
||||
if (i > 0 && unicode_chars[i - 1] > 0x7F) {
|
||||
near_non_ascii = true;
|
||||
}
|
||||
if (i + 1 < unicode_chars.size() && unicode_chars[i + 1] > 0x7F) {
|
||||
near_non_ascii = true;
|
||||
}
|
||||
if (near_non_ascii) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
processed_chars.push_back(current);
|
||||
}
|
||||
|
||||
std::string result_utf8;
|
||||
utf8::utf32to8(processed_chars.begin(), processed_chars.end(), std::back_inserter(result_utf8));
|
||||
|
||||
ofString result;
|
||||
#ifdef UNICODE_OFSTR
|
||||
utf8::utf8to16(result_utf8.begin(), result_utf8.end(), std::back_inserter(result));
|
||||
#else
|
||||
result = result_utf8;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
CThreadSleep::CThreadSleep()
|
||||
{
|
||||
is_stop_sleep_ = false;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <iostream>
|
||||
#include "of_str.h"
|
||||
#include <of_str.h>
|
||||
#include <of_path.h>
|
||||
#include <of_util.h>
|
||||
#include <cassert>
|
||||
|
||||
using namespace ofen;
|
||||
@ -19,10 +20,18 @@ void testB()
|
||||
assert(rp == ofT("cpNiz"));
|
||||
}
|
||||
|
||||
void testC()
|
||||
{
|
||||
std::string source(u8"这是 一 个测试 用例。 ");
|
||||
std::string expect(u8"这是一个测试用例。");
|
||||
assert(CCodec::rbs(source) == expect);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
testA();
|
||||
testB();
|
||||
testC();
|
||||
std::cout << "Done" << std::endl;
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user