openai-api/jsondata.cxx

186 lines
5.3 KiB
C++
Raw Normal View History

2025-02-12 10:06:33 +08:00
#include "jsondata.h"
#include <iostream>
2025-02-12 13:36:28 +08:00
CJsonOper::CJsonOper(const std::string& user_name, const std::string& model, const std::string& assistant_name)
: user_(user_name), model_(model), assistant_(assistant_name)
2025-02-12 10:06:33 +08:00
{
}
CJsonOper::~CJsonOper()
{
}
std::string CJsonOper::format_request(const std::string& content)
{
2025-02-12 13:36:28 +08:00
std::string model = model_;
std::string role = user_;
2025-02-12 10:06:33 +08:00
nlohmann::json json_data = {{"model", model}, {"messages", {{{"role", role}, {"content", content}}}}};
return json_data.dump();
}
2025-02-12 13:36:28 +08:00
std::vector<std::string> CJsonOper::split(const std::string& input, const std::string& delimiter)
{
std::vector<std::string> result;
size_t pos = 0, prev = 0;
while ((pos = input.find(delimiter, prev)) != std::string::npos) {
result.push_back(input.substr(prev, pos - prev));
prev = pos + delimiter.size();
}
result.push_back(input.substr(prev));
return result;
}
std::string CJsonOper::multi_format_reuqest(const std::string& content, size_t per_sec_size)
{
std::string model = model_;
std::string role = user_;
nlohmann::json json_data;
json_data["model"] = model;
std::vector<nlohmann::json> messages;
size_t s = 0;
while (s < content.size()) {
size_t i = 0;
size_t t = 0;
while (i < per_sec_size && s + i < content.size()) {
t = get_u8_len(content[s + i]);
if (t == 0) {
std::cerr << "invalid codec!!!" << std::endl;
exit(1);
}
i += t;
}
std::string part = content.substr(s, i);
messages.push_back({{"role", role}, {"content", "\n附加数据:\n" + part}});
s += i;
}
json_data["messages"] = messages;
return json_data.dump();
}
2025-02-12 10:06:33 +08:00
Message CJsonOper::parse(const std::string& data)
{
Message re;
json j = json::parse(data);
if (j.contains("choices") && j["choices"].is_array() && !j["choices"].empty()) {
const auto& message = j["choices"][0]["message"];
if (message.contains("content")) {
re.message_content = message["content"].get<std::string>();
}
if (message.contains("reasoning_content")) {
re.reasoning_content = message["reasoning_content"].get<std::string>();
}
}
if (j.contains("usage")) {
const auto& usage = j["usage"];
if (usage.contains("prompt_tokens")) {
re.prompt_tokens = usage["prompt_tokens"].get<int>();
}
if (usage.contains("completion_tokens")) {
re.completion_tokens = usage["completion_tokens"].get<int>();
}
if (usage.contains("total_tokens")) {
re.total_tokens = usage["total_tokens"].get<int>();
}
}
if (j.contains("id")) {
re.id = j["id"].get<std::string>();
}
return re;
}
bool CJsonOper::save_md(const std::string& data, const std::string& id)
{
std::ofstream of(id + ".md");
if (!of.is_open()) {
std::cout << "can't open " << id << std::endl;
return false;
}
of << data;
of.close();
return true;
}
2025-02-12 13:36:28 +08:00
bool CJsonOper::read_txt(const std::string& path, std::string& out)
{
std::ifstream file(path);
if (!file.is_open()) {
std::cout << "open failed: " << path << std::endl;
return false;
}
std::istreambuf_iterator<char> iterf(file);
std::istreambuf_iterator<char> iter;
std::string content(iterf, iter);
out = content;
return true;
}
size_t CJsonOper::get_u8_len(unsigned char ch)
{
if (ch <= 0x7F) {
return 1;
} else if ((ch & 0xE0) == 0xC0) {
return 2;
} else if ((ch & 0xF0) == 0xE0) {
return 3;
} else if ((ch & 0xF8) == 0xF0) {
return 4;
} else if ((ch & 0xFC) == 0xF8) {
return 5;
} else if ((ch & 0xFE) == 0xFC) {
return 6;
} else {
std::cerr << "invalid u8 first ch." << std::endl;
exit(1);
}
return 0;
}
std::string CJsonOper::trim(const std::string& input)
{
size_t start = input.find_first_not_of(" \t\n\r\f\v");
if (start == std::string::npos) {
return "";
}
size_t end = input.find_last_not_of(" \t\n\r\f\v");
return input.substr(start, end - start + 1);
}
std::string CJsonOper::get_all_dir_content(const std::string& dir, const std::string& types)
{
auto vec = split(types, ",");
std::vector<std::string> t;
for (const auto& item : vec) {
auto c = trim(item);
if (c.empty()) {
continue;
}
t.push_back("." + item);
std::cout << "use type:" << item << std::endl;
}
std::vector<std::string> task;
for (const auto& entry : fs::directory_iterator(dir)) {
if (!fs::is_regular_file(entry)) {
continue;
}
auto exten = entry.path().filename().extension().string();
if (std::find(t.begin(), t.end(), exten) != t.end()) {
std::cout << "Parse:" << entry.path().string() << std::endl;
task.push_back(entry.path().string());
}
}
// 提取内容
std::string content;
for (const auto& item : task) {
std::string one;
if (read_txt(item, one)) {
content.append("\n\n" + one);
} else {
std::cerr << "Can't read file: " << item << std::endl;
exit(1);
}
}
return content;
}