When parsing wchar_t content on win32 platform, rapidxml may throw parse_error exception. The content:
<xml att='最好' />
Here is my testing code:
/*
* @file : TestRapidXmlBug.cpp
* @author: shilyx
* @date : 2015-09-16 11:02:22.886
* @note : Generated by SlxTemplates
*/
#include <Windows.h>
#include "rapidxml.hpp"
#include <iostream>
#include <string>
using namespace std;
using namespace rapidxml;
int main(int argc, char *argv[])
{
// data block
unsigned char szData[] = {
0x3C, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, 0x00, 0x61, 0x00, 0x74, 0x00, 0x74, 0x00, 0x3D,
0x00, 0x27, 0x00, 0x00, 0x67, 0x7D, 0x59, 0x27, 0x00, 0x20, 0x00, 0x2F, 0x00, 0x3E, 0x00, 0x00, 0x00};
// uft8 string
char szDataUtf8[sizeof(szData) * 10] = "";
// ucs2 string
wchar_t *szDataUcs2 = (wchar_t *)szData;
WideCharToMultiByte(CP_UTF8, 0, szDataUcs2, -1, szDataUtf8, sizeof(szDataUtf8), NULL, NULL);
try
{
xml_document<wchar_t> xml;
cout<<"-------------------------wchar_t"<<endl;
xml.parse<0>(szDataUcs2); // will throw parse_error
cout<<"success"<<endl;
}
catch (parse_error &ex)
{
cout<<"exception: "<<ex.what()<<endl;
cout<<"failled"<<endl;
}
try
{
xml_document<char> xml;
cout<<"-------------------------char"<<endl;
xml.parse<0>(szDataUtf8); // will not throw any exception
cout<<"success"<<endl;
}
catch (parse_error &ex)
{
cout<<ex.what()<<endl;
cout<<"failled"<<endl;
}
return 0;
}
It will throw exception at:
// Make sure that end quote is present
if (*text != quote)
RAPIDXML_PARSE_ERROR("expected ' or \"", text);
++text; // Skip quote
The reason may be:
// Skip characters until predicate evaluates to true
template<class StopPred, int Flags>
static void skip(Ch *&text)
{
Ch *tmp = text;
while (StopPred::test(*tmp))
++tmp;
text = tmp;
}
The StopPred::test function:
// Detect attribute value character
template<Ch Quote>
struct attribute_value_pure_pred
{
static unsigned char test(Ch ch)
{
if (Quote == Ch('\''))
return internal::lookup_tables<0>::lookup_attribute_data_1_pure[static_cast<unsigned char>(ch)];
if (Quote == Ch('\"'))
return internal::lookup_tables<0>::lookup_attribute_data_2_pure[static_cast<unsigned char>(ch)];
return 0; // Should never be executed, to avoid warnings on Comeau
}
};
static_cast changes a wchar_t(0x6700) to unsigned char(0x00), the skip operation stopped.
Is this a bug? or a wrong to use rapidxml with wchar_t? rapidxml's last update date is 2013-04-26, I think it should be stable enough.