Pretty print XML using MSXML6
Задача простая - отформатировать красиво XML.
MSXML6 я обычно использую следующим образом:
#import <msxml6.dll> rename_namespace("MSXML6")
Заголовочный файл с классом XMLUtility.h
:
#pragma once struct XMLUtility { static _bstr_t PrettyPrint(const MSXML6::IXMLDOMNodePtr &pSrc); };
cpp файл с классом XMLUtility.cpp
:
#include "stdafx.h" #include "XMLUtility.h" namespace { using namespace MSXML6; class SAXContentHandlerFilter : public ISAXContentHandler { public: SAXContentHandlerFilter(const MSXML6::ISAXContentHandlerPtr &pTarget) : _pTarget(pTarget), _charactersSkipThreshold(INT_MAX) { ASSERT(pTarget); } virtual ~SAXContentHandlerFilter() {} public: // IUnknown implementation HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, LPVOID *ppvObj) { if (!ppvObj) return(E_POINTER); if (riid == IID_IUnknown || riid == __uuidof(ISAXContentHandler)) { *ppvObj = this; } else { *ppvObj = NULL; return(E_NOINTERFACE); } AddRef(); return(S_OK); } ULONG STDMETHODCALLTYPE AddRef() { return(InterlockedIncrement(&_refCount)); } ULONG STDMETHODCALLTYPE Release() { const ULONG count = InterlockedDecrement(&_refCount); if (count == 0) delete this; return(count); } // ISAXContentHandler implementation public: STDMETHODIMP raw_putDocumentLocator(struct ISAXLocator *pLocator) { return(_pTarget->raw_putDocumentLocator(pLocator)); } STDMETHODIMP raw_startDocument() { return(_pTarget->raw_startDocument()); } STDMETHODIMP raw_endDocument() { return(_pTarget->raw_endDocument()); } STDMETHODIMP raw_startPrefixMapping(unsigned short *pwchPrefix, int cchPrefix, unsigned short *pwchUri, int cchUri) { return(_pTarget->raw_startPrefixMapping(pwchPrefix, cchPrefix, pwchUri, cchUri)); } STDMETHODIMP raw_endPrefixMapping(unsigned short *pwchPrefix, int cchPrefix) { return(_pTarget->raw_endPrefixMapping(pwchPrefix, cchPrefix)); } STDMETHODIMP raw_startElement(unsigned short *pwchNamespaceUri, int cchNamespaceUri, unsigned short *pwchLocalName, int cchLocalName, unsigned short *pwchQName, int cchQName, struct ISAXAttributes *pAttributes) { return(_pTarget->raw_startElement(pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName, pAttributes)); } STDMETHODIMP raw_endElement(unsigned short *pwchNamespaceUri, int cchNamespaceUri, unsigned short *pwchLocalName, int cchLocalName, unsigned short *pwchQName, int cchQName) { return(_pTarget->raw_endElement(pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName)); } STDMETHODIMP raw_characters(unsigned short *pwchChars, int cchChars) { const bool skip = _charactersSkipThreshold < cchChars && _pSAXLexicalHandler; if (!skip) { return(_pTarget->raw_characters(pwchChars, cchChars)); } else { CStringW skipped; skipped.Format(L" %d characters have been skipped ", cchChars); return(_pSAXLexicalHandler->comment(reinterpret_cast<unsigned short *>(skipped.LockBuffer()), skipped.GetLength())); } } STDMETHODIMP raw_ignorableWhitespace(unsigned short *pwchChars, int cchChars) { return(_pTarget->raw_ignorableWhitespace(pwchChars, cchChars)); } STDMETHODIMP raw_processingInstruction(unsigned short *pwchTarget, int cchTarget, unsigned short *pwchData, int cchData) { return(_pTarget->raw_processingInstruction(pwchTarget, cchTarget, pwchData, cchData)); } STDMETHODIMP raw_skippedEntity(unsigned short *pwchName, int cchName) { return(_pTarget->raw_skippedEntity(pwchName, cchName)); } public: void SetLexicalHandler(const MSXML6::ISAXLexicalHandlerPtr &pSAXLexicalHandler) { ASSERT(pSAXLexicalHandler); _pSAXLexicalHandler = pSAXLexicalHandler; } void SetCharactersSkipThreshold(int charactersSkipThreshold) { ASSERT(_pSAXLexicalHandler); _charactersSkipThreshold = charactersSkipThreshold; } private: const MSXML6::ISAXContentHandlerPtr _pTarget; LONG _refCount; // Filter options ISAXLexicalHandlerPtr _pSAXLexicalHandler; int _charactersSkipThreshold; }; } _bstr_t XMLUtility::PrettyPrint(const MSXML6::IXMLDOMNodePtr &pSrc) { ASSERT(pSrc); if (!pSrc) { return(L"<NULL/>"); } using namespace MSXML6; HRESULT hr = S_OK; IMXWriterPtr pMXWriter; if (FAILED(hr = pMXWriter.CreateInstance(__uuidof(MXXMLWriter60)))) { return(pSrc->xml); } pMXWriter->indent = true; pMXWriter->omitXMLDeclaration = true; const ISAXContentHandlerPtr pSAXContentHandler = pMXWriter; const ISAXErrorHandlerPtr pSAXErrorHandler = pMXWriter; const ISAXDTDHandlerPtr pSAXDTDHandler = pMXWriter; const ISAXLexicalHandlerPtr pSAXLexicalHandler = pMXWriter; const ISAXDeclHandlerPtr pSAXDeclHandler = pMXWriter; if (!pSAXContentHandler || !pSAXErrorHandler || !pSAXDTDHandler || !pSAXLexicalHandler || !pSAXDeclHandler) { return(pSrc->xml); } ISAXXMLReaderPtr pSAXReader; if (FAILED(hr = pSAXReader.CreateInstance(__uuidof(SAXXMLReader60)))) { return(pSrc->xml); } SAXContentHandlerFilter * const pFilter = new SAXContentHandlerFilter(pSAXContentHandler); const ISAXContentHandlerPtr pContentHandlerProxy(pFilter); pFilter->SetLexicalHandler(pSAXLexicalHandler); pFilter->SetCharactersSkipThreshold(200); if (FAILED(hr = pSAXReader->putContentHandler(pContentHandlerProxy)) || FAILED(hr = pSAXReader->putDTDHandler(pSAXDTDHandler)) || FAILED(hr = pSAXReader->putErrorHandler(pSAXErrorHandler)) || FAILED(hr = pSAXReader->putProperty(reinterpret_cast<unsigned short *>(L"http://xml.org/sax/properties/lexical-handler"), _variant_t(pSAXLexicalHandler.GetInterfacePtr()))) || FAILED(hr = pSAXReader->putProperty(reinterpret_cast<unsigned short *>(L"http://xml.org/sax/properties/declaration-handler"), _variant_t(pSAXDeclHandler.GetInterfacePtr())))) { return(pSrc->xml); } if (FAILED(hr = pSAXReader->parse(_variant_t(pSrc.GetInterfacePtr())))) { return(pSrc->xml); } return(pMXWriter->output); }
Кроме форматирования я делаю еще некоторую фильтрацию - выбрасываю слишком большие текстовые фрагменты, заменяя их комментариями.
0 коммент.:
Отправить комментарий