186 lines
5.3 KiB
C++
186 lines
5.3 KiB
C++
#include "jsondata.h"
|
|
#include <iostream>
|
|
|
|
CJsonOper::CJsonOper(const std::string& user_name, const std::string& model, const std::string& assistant_name)
|
|
: user_(user_name), model_(model), assistant_(assistant_name)
|
|
{
|
|
}
|
|
|
|
CJsonOper::~CJsonOper()
|
|
{
|
|
}
|
|
|
|
std::string CJsonOper::format_request(const std::string& content)
|
|
{
|
|
std::string model = model_;
|
|
std::string role = user_;
|
|
nlohmann::json json_data = {{"model", model}, {"messages", {{{"role", role}, {"content", content}}}}};
|
|
return json_data.dump();
|
|
}
|
|
|
|
std::vector<std::string> CJsonOper::split(const std::string& input, const std::string& delimiter)
|
|
{
|
|
std::vector<std::string> result;
|
|
size_t pos = 0, prev = 0;
|
|
while ((pos = input.find(delimiter, prev)) != std::string::npos) {
|
|
result.push_back(input.substr(prev, pos - prev));
|
|
prev = pos + delimiter.size();
|
|
}
|
|
result.push_back(input.substr(prev));
|
|
return result;
|
|
}
|
|
|
|
std::string CJsonOper::multi_format_reuqest(const std::string& content, size_t per_sec_size)
|
|
{
|
|
std::string model = model_;
|
|
std::string role = user_;
|
|
nlohmann::json json_data;
|
|
json_data["model"] = model;
|
|
|
|
std::vector<nlohmann::json> messages;
|
|
size_t s = 0;
|
|
while (s < content.size()) {
|
|
size_t i = 0;
|
|
size_t t = 0;
|
|
while (i < per_sec_size && s + i < content.size()) {
|
|
t = get_u8_len(content[s + i]);
|
|
if (t == 0) {
|
|
std::cerr << "invalid codec!!!" << std::endl;
|
|
exit(1);
|
|
}
|
|
i += t;
|
|
}
|
|
std::string part = content.substr(s, i);
|
|
messages.push_back({{"role", role}, {"content", "\n附加数据:\n" + part}});
|
|
s += i;
|
|
}
|
|
|
|
json_data["messages"] = messages;
|
|
return json_data.dump();
|
|
}
|
|
|
|
Message CJsonOper::parse(const std::string& data)
|
|
{
|
|
Message re;
|
|
json j = json::parse(data);
|
|
if (j.contains("choices") && j["choices"].is_array() && !j["choices"].empty()) {
|
|
const auto& message = j["choices"][0]["message"];
|
|
if (message.contains("content")) {
|
|
re.message_content = message["content"].get<std::string>();
|
|
}
|
|
if (message.contains("reasoning_content")) {
|
|
re.reasoning_content = message["reasoning_content"].get<std::string>();
|
|
}
|
|
}
|
|
if (j.contains("usage")) {
|
|
const auto& usage = j["usage"];
|
|
if (usage.contains("prompt_tokens")) {
|
|
re.prompt_tokens = usage["prompt_tokens"].get<int>();
|
|
}
|
|
if (usage.contains("completion_tokens")) {
|
|
re.completion_tokens = usage["completion_tokens"].get<int>();
|
|
}
|
|
if (usage.contains("total_tokens")) {
|
|
re.total_tokens = usage["total_tokens"].get<int>();
|
|
}
|
|
}
|
|
if (j.contains("id")) {
|
|
re.id = j["id"].get<std::string>();
|
|
}
|
|
return re;
|
|
}
|
|
|
|
bool CJsonOper::save_md(const std::string& data, const std::string& id)
|
|
{
|
|
std::ofstream of(id + ".md");
|
|
if (!of.is_open()) {
|
|
std::cout << "can't open " << id << std::endl;
|
|
return false;
|
|
}
|
|
of << data;
|
|
of.close();
|
|
return true;
|
|
}
|
|
|
|
bool CJsonOper::read_txt(const std::string& path, std::string& out)
|
|
{
|
|
std::ifstream file(path);
|
|
if (!file.is_open()) {
|
|
std::cout << "open failed: " << path << std::endl;
|
|
return false;
|
|
}
|
|
std::istreambuf_iterator<char> iterf(file);
|
|
std::istreambuf_iterator<char> iter;
|
|
std::string content(iterf, iter);
|
|
out = content;
|
|
return true;
|
|
}
|
|
|
|
size_t CJsonOper::get_u8_len(unsigned char ch)
|
|
{
|
|
if (ch <= 0x7F) {
|
|
return 1;
|
|
} else if ((ch & 0xE0) == 0xC0) {
|
|
return 2;
|
|
} else if ((ch & 0xF0) == 0xE0) {
|
|
return 3;
|
|
} else if ((ch & 0xF8) == 0xF0) {
|
|
return 4;
|
|
} else if ((ch & 0xFC) == 0xF8) {
|
|
return 5;
|
|
} else if ((ch & 0xFE) == 0xFC) {
|
|
return 6;
|
|
} else {
|
|
std::cerr << "invalid u8 first ch." << std::endl;
|
|
exit(1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
std::string CJsonOper::trim(const std::string& input)
|
|
{
|
|
size_t start = input.find_first_not_of(" \t\n\r\f\v");
|
|
if (start == std::string::npos) {
|
|
return "";
|
|
}
|
|
size_t end = input.find_last_not_of(" \t\n\r\f\v");
|
|
return input.substr(start, end - start + 1);
|
|
}
|
|
|
|
std::string CJsonOper::get_all_dir_content(const std::string& dir, const std::string& types)
|
|
{
|
|
auto vec = split(types, ",");
|
|
std::vector<std::string> t;
|
|
for (const auto& item : vec) {
|
|
auto c = trim(item);
|
|
if (c.empty()) {
|
|
continue;
|
|
}
|
|
t.push_back("." + item);
|
|
std::cout << "use type:" << item << std::endl;
|
|
}
|
|
std::vector<std::string> task;
|
|
for (const auto& entry : fs::directory_iterator(dir)) {
|
|
if (!fs::is_regular_file(entry)) {
|
|
continue;
|
|
}
|
|
auto exten = entry.path().filename().extension().string();
|
|
if (std::find(t.begin(), t.end(), exten) != t.end()) {
|
|
std::cout << "Parse:" << entry.path().string() << std::endl;
|
|
task.push_back(entry.path().string());
|
|
}
|
|
}
|
|
// 提取内容
|
|
std::string content;
|
|
for (const auto& item : task) {
|
|
std::string one;
|
|
if (read_txt(item, one)) {
|
|
content.append("\n\n" + one);
|
|
} else {
|
|
std::cerr << "Can't read file: " << item << std::endl;
|
|
exit(1);
|
|
}
|
|
}
|
|
return content;
|
|
}
|