C++实现对URL的编码和解码(支持ansi和utf8格式)
#include<iostream>#include<string>#include<stdio.h>#include<assert.h>#include<fstream>using namespace std;unsigned char ToHex(unsigned char x){returnx > 9 ? x + 55 : x
·
#include<iostream>
#include<string>
#include<stdio.h>
#include<assert.h>
#include<fstream>
using namespace std;
unsigned char ToHex(unsigned char x)
{
return x > 9 ? x + 55 : x + 48;
}
unsigned char FromHex(unsigned char x)
{
unsigned char y;
if (x >= 'A' && x <= 'Z') y = x - 'A' + 10;
else if (x >= 'a' && x <= 'z') y = x - 'a' + 10;
else if (x >= '0' && x <= '9') y = x - '0';
else assert(0);
return y;
}
std::string UrlEncode(const std::string& str)
{
std::string strTemp = "";
size_t length = str.length();
for (size_t i = 0; i < length; i++)
{
if (isalnum((unsigned char)str[i]) ||
(str[i] == '-') ||
(str[i] == '_') ||
(str[i] == '.') ||
(str[i] == '~'))
strTemp += str[i];
else if (str[i] == ' ')
strTemp += "+";
else
{
strTemp += '%';
strTemp += ToHex((unsigned char)str[i] >> 4);
strTemp += ToHex((unsigned char)str[i] % 16);
}
}
return strTemp;
}
std::string UrlDecode(const std::string& str)
{
std::string strTemp = "";
size_t length = str.length();
for (size_t i = 0; i < length; i++)
{
if (str[i] == '+') strTemp += ' ';
else if (str[i] == '%')
{
assert(i + 2 < length);
unsigned char high = FromHex((unsigned char)str[++i]);
unsigned char low = FromHex((unsigned char)str[++i]);
strTemp += high * 16 + low;
}
else strTemp += str[i];
}
return strTemp;
}
int main()
{
//进行URL编码 纯英文 对于不带中文的字符转的码ansi和utf8是一样的
string str1 = "=;+/,";
string str1_url = UrlEncode(str1);
cout << str1_url << endl;
//对于带中文的ansi 编码和解码
string str_ansi = "还魂草";
string str_ansi_en = UrlEncode(str_ansi);
cout << str_ansi_en << endl;
//ansi的中文解码后也是ansi格式的中文
string str_ansi_de = UrlDecode(str_ansi_en);
cout << str_ansi_de << endl;
//对于带中文的utf8的编码和解码
string str_utf8 = u8"还魂草";
string str_utf8_en = UrlEncode(str_utf8);
//可以看到相同的中文 utf8和ansi是不一样的
cout << str_utf8_en << endl;
//utf8的中文解码后也是utf8的
string str_utf8_de = UrlDecode(str_utf8_en);
cout << str_utf8_de << endl;
ofstream wtxt("log.txt");
wtxt << str_utf8_de << endl;
wtxt.close();
getchar();
return 0;
}
以上程序执行的结果如下:
%3D%3B%2B%2F%2C
%BB%B9%BB%EA%B2%DD
还魂草
%E8%BF%98%E9%AD%82%E8%8D%89
杩橀瓊鑽
因为在C++控制台直接输出的为ansi格式字符串才不会乱码,因此最后一行的解密出的还魂草为utf8,直接输出为乱码,不过可以从log.txt中查看,打开log.txt,可以看到解密出的字符串并且编码格式为utf8
更多推荐
已为社区贡献5条内容
所有评论(0)