Hungry Mind , Blog about everything in IT - C#, Java, C++, .NET, Windows, WinAPI, ...

Pretty print XML using MSXML6

Задача простая - отформатировать красиво XML.

MSXML6 я обычно использую следующим образом:

#import <msxml6.dll> rename_namespace("MSXML6")

Заголовочный файл с классом XMLUtility.h:

#pragma once

struct XMLUtility
{

    static _bstr_t PrettyPrint(const MSXML6::IXMLDOMNodePtr &pSrc);

};

cpp файл с классом XMLUtility.cpp:

#include "stdafx.h"
#include "XMLUtility.h"

namespace {

    using namespace MSXML6;

    class SAXContentHandlerFilter : public ISAXContentHandler
    {

    public:
        SAXContentHandlerFilter(const MSXML6::ISAXContentHandlerPtr &pTarget) : _pTarget(pTarget), _charactersSkipThreshold(INT_MAX) {
            ASSERT(pTarget);
        }
        virtual ~SAXContentHandlerFilter() {}

    public:
        // IUnknown implementation
        HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, LPVOID *ppvObj) {
            if (!ppvObj)
                return(E_POINTER);

            if (riid == IID_IUnknown || riid == __uuidof(ISAXContentHandler)) {
                *ppvObj = this;
            }
            else {
                *ppvObj = NULL;
                return(E_NOINTERFACE);
            }

            AddRef();
            return(S_OK);
        }

        ULONG STDMETHODCALLTYPE AddRef() {
            return(InterlockedIncrement(&_refCount));
        }

        ULONG STDMETHODCALLTYPE Release() {
            const ULONG count = InterlockedDecrement(&_refCount);
            if (count == 0)
                delete this;
            return(count);
        }


        // ISAXContentHandler implementation
    public:
        STDMETHODIMP raw_putDocumentLocator(struct ISAXLocator *pLocator) {
            return(_pTarget->raw_putDocumentLocator(pLocator));
        }

        STDMETHODIMP raw_startDocument() {
            return(_pTarget->raw_startDocument());
        }

        STDMETHODIMP raw_endDocument() {
            return(_pTarget->raw_endDocument());
        }

        STDMETHODIMP raw_startPrefixMapping(unsigned short *pwchPrefix, int cchPrefix, unsigned short *pwchUri, int cchUri) {
            return(_pTarget->raw_startPrefixMapping(pwchPrefix, cchPrefix, pwchUri, cchUri));
        }

        STDMETHODIMP raw_endPrefixMapping(unsigned short *pwchPrefix, int cchPrefix) {
            return(_pTarget->raw_endPrefixMapping(pwchPrefix, cchPrefix));
        }

        STDMETHODIMP raw_startElement(unsigned short *pwchNamespaceUri, int cchNamespaceUri, unsigned short *pwchLocalName, int cchLocalName, unsigned short *pwchQName, int cchQName, struct ISAXAttributes *pAttributes) {
            return(_pTarget->raw_startElement(pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName, pAttributes));
        }

        STDMETHODIMP raw_endElement(unsigned short *pwchNamespaceUri, int cchNamespaceUri, unsigned short *pwchLocalName, int cchLocalName, unsigned short *pwchQName, int cchQName) {
            return(_pTarget->raw_endElement(pwchNamespaceUri, cchNamespaceUri, pwchLocalName, cchLocalName, pwchQName, cchQName));
        }

        STDMETHODIMP raw_characters(unsigned short *pwchChars, int cchChars) {
            const bool skip = _charactersSkipThreshold < cchChars && _pSAXLexicalHandler;
            if (!skip) {
                return(_pTarget->raw_characters(pwchChars, cchChars));
            }
            else {
                CStringW skipped;
                skipped.Format(L" %d characters have been skipped ", cchChars);
                return(_pSAXLexicalHandler->comment(reinterpret_cast<unsigned short *>(skipped.LockBuffer()), skipped.GetLength()));
            }
        }

        STDMETHODIMP raw_ignorableWhitespace(unsigned short *pwchChars, int cchChars) {
            return(_pTarget->raw_ignorableWhitespace(pwchChars, cchChars));
        }

        STDMETHODIMP raw_processingInstruction(unsigned short *pwchTarget, int cchTarget, unsigned short *pwchData, int cchData) {
            return(_pTarget->raw_processingInstruction(pwchTarget, cchTarget, pwchData, cchData));
        }

        STDMETHODIMP raw_skippedEntity(unsigned short *pwchName, int cchName) {
            return(_pTarget->raw_skippedEntity(pwchName, cchName));
        }

    public:
        void SetLexicalHandler(const MSXML6::ISAXLexicalHandlerPtr &pSAXLexicalHandler) {
            ASSERT(pSAXLexicalHandler);

            _pSAXLexicalHandler = pSAXLexicalHandler;
        }

        void SetCharactersSkipThreshold(int charactersSkipThreshold) {
            ASSERT(_pSAXLexicalHandler);

            _charactersSkipThreshold = charactersSkipThreshold;
        }

    private:
        const MSXML6::ISAXContentHandlerPtr _pTarget;
        LONG _refCount;
        // Filter options
        ISAXLexicalHandlerPtr _pSAXLexicalHandler;
        int _charactersSkipThreshold;

    };

}

_bstr_t XMLUtility::PrettyPrint(const MSXML6::IXMLDOMNodePtr &pSrc) {
    ASSERT(pSrc);
    if (!pSrc) {
        return(L"<NULL/>");
    }

    using namespace MSXML6;

    HRESULT hr = S_OK;

    IMXWriterPtr pMXWriter;
    if (FAILED(hr = pMXWriter.CreateInstance(__uuidof(MXXMLWriter60)))) {
        return(pSrc->xml);
    }

    pMXWriter->indent = true;
    pMXWriter->omitXMLDeclaration = true;

    const ISAXContentHandlerPtr pSAXContentHandler = pMXWriter;
    const ISAXErrorHandlerPtr pSAXErrorHandler = pMXWriter;
    const ISAXDTDHandlerPtr pSAXDTDHandler = pMXWriter;
    const ISAXLexicalHandlerPtr pSAXLexicalHandler = pMXWriter;
    const ISAXDeclHandlerPtr pSAXDeclHandler = pMXWriter;
    if (!pSAXContentHandler || !pSAXErrorHandler || !pSAXDTDHandler || !pSAXLexicalHandler || !pSAXDeclHandler) {
        return(pSrc->xml);
    }

    ISAXXMLReaderPtr pSAXReader;
    if (FAILED(hr = pSAXReader.CreateInstance(__uuidof(SAXXMLReader60)))) {
        return(pSrc->xml);
    }

    SAXContentHandlerFilter * const pFilter = new SAXContentHandlerFilter(pSAXContentHandler);
    const ISAXContentHandlerPtr pContentHandlerProxy(pFilter);
    pFilter->SetLexicalHandler(pSAXLexicalHandler);
    pFilter->SetCharactersSkipThreshold(200);

    if    (FAILED(hr = pSAXReader->putContentHandler(pContentHandlerProxy))
            || FAILED(hr = pSAXReader->putDTDHandler(pSAXDTDHandler))
            || FAILED(hr = pSAXReader->putErrorHandler(pSAXErrorHandler))
            || FAILED(hr = pSAXReader->putProperty(reinterpret_cast<unsigned short *>(L"http://xml.org/sax/properties/lexical-handler"), _variant_t(pSAXLexicalHandler.GetInterfacePtr())))
            || FAILED(hr = pSAXReader->putProperty(reinterpret_cast<unsigned short *>(L"http://xml.org/sax/properties/declaration-handler"), _variant_t(pSAXDeclHandler.GetInterfacePtr())))) {
        return(pSrc->xml);
    }

    if    (FAILED(hr = pSAXReader->parse(_variant_t(pSrc.GetInterfacePtr())))) {
        return(pSrc->xml);
    }

    return(pMXWriter->output);
}

Кроме форматирования я делаю еще некоторую фильтрацию - выбрасываю слишком большие текстовые фрагменты, заменяя их комментариями.

0 коммент.:

Отправить комментарий

Copyright 2007-2011 Chabster