pugixml.cpp 333 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158
  1. /**
  2. * pugixml parser - version 1.13
  3. * --------------------------------------------------------
  4. * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
  5. * Report bugs and download new versions at https://pugixml.org/
  6. *
  7. * This library is distributed under the MIT License. See notice at the end
  8. * of this file.
  9. *
  10. * This work is based on the pugxml parser, which is:
  11. * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
  12. */
  13. #ifndef SOURCE_PUGIXML_CPP
  14. #define SOURCE_PUGIXML_CPP
  15. #include "pugixml.hpp"
  16. #include <stdlib.h>
  17. #include <stdio.h>
  18. #include <string.h>
  19. #include <assert.h>
  20. #include <limits.h>
  21. #ifdef PUGIXML_WCHAR_MODE
  22. # include <wchar.h>
  23. #endif
  24. #ifndef PUGIXML_NO_XPATH
  25. # include <math.h>
  26. # include <float.h>
  27. #endif
  28. #ifndef PUGIXML_NO_STL
  29. # include <istream>
  30. # include <ostream>
  31. # include <string>
  32. #endif
  33. // For placement new
  34. #include <new>
  35. #ifdef _MSC_VER
  36. # pragma warning(push)
  37. # pragma warning(disable: 4127) // conditional expression is constant
  38. # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
  39. # pragma warning(disable: 4702) // unreachable code
  40. # pragma warning(disable: 4996) // this function or variable may be unsafe
  41. #endif
  42. #if defined(_MSC_VER) && defined(__c2__)
  43. # pragma clang diagnostic push
  44. # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
  45. #endif
  46. #ifdef __INTEL_COMPILER
  47. # pragma warning(disable: 177) // function was declared but never referenced
  48. # pragma warning(disable: 279) // controlling expression is constant
  49. # pragma warning(disable: 1478 1786) // function was declared "deprecated"
  50. # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
  51. #endif
  52. #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
  53. # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
  54. #endif
  55. #ifdef __BORLANDC__
  56. # pragma option push
  57. # pragma warn -8008 // condition is always false
  58. # pragma warn -8066 // unreachable code
  59. #endif
  60. #ifdef __SNC__
  61. // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
  62. # pragma diag_suppress=178 // function was declared but never referenced
  63. # pragma diag_suppress=237 // controlling expression is constant
  64. #endif
  65. #ifdef __TI_COMPILER_VERSION__
  66. # pragma diag_suppress 179 // function was declared but never referenced
  67. #endif
  68. // Inlining controls
  69. #if defined(_MSC_VER) && _MSC_VER >= 1300
  70. # define PUGI__NO_INLINE __declspec(noinline)
  71. #elif defined(__GNUC__)
  72. # define PUGI__NO_INLINE __attribute__((noinline))
  73. #else
  74. # define PUGI__NO_INLINE
  75. #endif
  76. // Branch weight controls
  77. #if defined(__GNUC__) && !defined(__c2__)
  78. # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
  79. #else
  80. # define PUGI__UNLIKELY(cond) (cond)
  81. #endif
  82. // Simple static assertion
  83. #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
  84. // Digital Mars C++ bug workaround for passing char loaded from memory via stack
  85. #ifdef __DMC__
  86. # define PUGI__DMC_VOLATILE volatile
  87. #else
  88. # define PUGI__DMC_VOLATILE
  89. #endif
  90. // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
  91. #if defined(__clang__) && defined(__has_attribute)
  92. # if __has_attribute(no_sanitize)
  93. # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
  94. # else
  95. # define PUGI__UNSIGNED_OVERFLOW
  96. # endif
  97. #else
  98. # define PUGI__UNSIGNED_OVERFLOW
  99. #endif
  100. // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
  101. #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
  102. using std::memcpy;
  103. using std::memmove;
  104. using std::memset;
  105. #endif
  106. // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
  107. #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
  108. # define LLONG_MIN (-LLONG_MAX - 1LL)
  109. # define LLONG_MAX __LONG_LONG_MAX__
  110. # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
  111. #endif
  112. // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
  113. #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE)
  114. # define PUGI__MSVC_CRT_VERSION _MSC_VER
  115. #elif defined(_WIN32_WCE)
  116. # define PUGI__MSVC_CRT_VERSION 1310 // MSVC7.1
  117. #endif
  118. // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
  119. #if __cplusplus >= 201103
  120. # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
  121. #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  122. # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
  123. #else
  124. # define PUGI__SNPRINTF sprintf
  125. #endif
  126. // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
  127. #ifdef PUGIXML_HEADER_ONLY
  128. # define PUGI__NS_BEGIN namespace pugi { namespace impl {
  129. # define PUGI__NS_END } }
  130. # define PUGI__FN inline
  131. # define PUGI__FN_NO_INLINE inline
  132. #else
  133. # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
  134. # define PUGI__NS_BEGIN namespace pugi { namespace impl {
  135. # define PUGI__NS_END } }
  136. # else
  137. # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
  138. # define PUGI__NS_END } } }
  139. # endif
  140. # define PUGI__FN
  141. # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
  142. #endif
  143. // uintptr_t
  144. #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
  145. namespace pugi
  146. {
  147. # ifndef _UINTPTR_T_DEFINED
  148. typedef size_t uintptr_t;
  149. # endif
  150. typedef unsigned __int8 uint8_t;
  151. typedef unsigned __int16 uint16_t;
  152. typedef unsigned __int32 uint32_t;
  153. }
  154. #else
  155. # include <stdint.h>
  156. #endif
  157. // Memory allocation
  158. PUGI__NS_BEGIN
  159. PUGI__FN void* default_allocate(size_t size)
  160. {
  161. return malloc(size);
  162. }
  163. PUGI__FN void default_deallocate(void* ptr)
  164. {
  165. free(ptr);
  166. }
  167. template <typename T>
  168. struct xml_memory_management_function_storage
  169. {
  170. static allocation_function allocate;
  171. static deallocation_function deallocate;
  172. };
  173. // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
  174. // Without a template<> we'll get multiple definitions of the same static
  175. template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
  176. template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
  177. typedef xml_memory_management_function_storage<int> xml_memory;
  178. PUGI__NS_END
  179. // String utilities
  180. PUGI__NS_BEGIN
  181. // Get string length
  182. PUGI__FN size_t strlength(const char_t* s)
  183. {
  184. assert(s);
  185. #ifdef PUGIXML_WCHAR_MODE
  186. return wcslen(s);
  187. #else
  188. return strlen(s);
  189. #endif
  190. }
  191. // Compare two strings
  192. PUGI__FN bool strequal(const char_t* src, const char_t* dst)
  193. {
  194. assert(src && dst);
  195. #ifdef PUGIXML_WCHAR_MODE
  196. return wcscmp(src, dst) == 0;
  197. #else
  198. return strcmp(src, dst) == 0;
  199. #endif
  200. }
  201. // Compare lhs with [rhs_begin, rhs_end)
  202. PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
  203. {
  204. for (size_t i = 0; i < count; ++i)
  205. if (lhs[i] != rhs[i])
  206. return false;
  207. return lhs[count] == 0;
  208. }
  209. // Get length of wide string, even if CRT lacks wide character support
  210. PUGI__FN size_t strlength_wide(const wchar_t* s)
  211. {
  212. assert(s);
  213. #ifdef PUGIXML_WCHAR_MODE
  214. return wcslen(s);
  215. #else
  216. const wchar_t* end = s;
  217. while (*end) end++;
  218. return static_cast<size_t>(end - s);
  219. #endif
  220. }
  221. PUGI__NS_END
  222. // auto_ptr-like object for exception recovery
  223. PUGI__NS_BEGIN
  224. template <typename T> struct auto_deleter
  225. {
  226. typedef void (*D)(T*);
  227. T* data;
  228. D deleter;
  229. auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
  230. {
  231. }
  232. ~auto_deleter()
  233. {
  234. if (data) deleter(data);
  235. }
  236. T* release()
  237. {
  238. T* result = data;
  239. data = 0;
  240. return result;
  241. }
  242. };
  243. PUGI__NS_END
  244. #ifdef PUGIXML_COMPACT
  245. PUGI__NS_BEGIN
  246. class compact_hash_table
  247. {
  248. public:
  249. compact_hash_table(): _items(0), _capacity(0), _count(0)
  250. {
  251. }
  252. void clear()
  253. {
  254. if (_items)
  255. {
  256. xml_memory::deallocate(_items);
  257. _items = 0;
  258. _capacity = 0;
  259. _count = 0;
  260. }
  261. }
  262. void* find(const void* key)
  263. {
  264. if (_capacity == 0) return 0;
  265. item_t* item = get_item(key);
  266. assert(item);
  267. assert(item->key == key || (item->key == 0 && item->value == 0));
  268. return item->value;
  269. }
  270. void insert(const void* key, void* value)
  271. {
  272. assert(_capacity != 0 && _count < _capacity - _capacity / 4);
  273. item_t* item = get_item(key);
  274. assert(item);
  275. if (item->key == 0)
  276. {
  277. _count++;
  278. item->key = key;
  279. }
  280. item->value = value;
  281. }
  282. bool reserve(size_t extra = 16)
  283. {
  284. if (_count + extra >= _capacity - _capacity / 4)
  285. return rehash(_count + extra);
  286. return true;
  287. }
  288. private:
  289. struct item_t
  290. {
  291. const void* key;
  292. void* value;
  293. };
  294. item_t* _items;
  295. size_t _capacity;
  296. size_t _count;
  297. bool rehash(size_t count);
  298. item_t* get_item(const void* key)
  299. {
  300. assert(key);
  301. assert(_capacity > 0);
  302. size_t hashmod = _capacity - 1;
  303. size_t bucket = hash(key) & hashmod;
  304. for (size_t probe = 0; probe <= hashmod; ++probe)
  305. {
  306. item_t& probe_item = _items[bucket];
  307. if (probe_item.key == key || probe_item.key == 0)
  308. return &probe_item;
  309. // hash collision, quadratic probing
  310. bucket = (bucket + probe + 1) & hashmod;
  311. }
  312. assert(false && "Hash table is full"); // unreachable
  313. return 0;
  314. }
  315. static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
  316. {
  317. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
  318. // MurmurHash3 32-bit finalizer
  319. h ^= h >> 16;
  320. h *= 0x85ebca6bu;
  321. h ^= h >> 13;
  322. h *= 0xc2b2ae35u;
  323. h ^= h >> 16;
  324. return h;
  325. }
  326. };
  327. PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
  328. {
  329. size_t capacity = 32;
  330. while (count >= capacity - capacity / 4)
  331. capacity *= 2;
  332. compact_hash_table rt;
  333. rt._capacity = capacity;
  334. rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
  335. if (!rt._items)
  336. return false;
  337. memset(rt._items, 0, sizeof(item_t) * capacity);
  338. for (size_t i = 0; i < _capacity; ++i)
  339. if (_items[i].key)
  340. rt.insert(_items[i].key, _items[i].value);
  341. if (_items)
  342. xml_memory::deallocate(_items);
  343. _capacity = capacity;
  344. _items = rt._items;
  345. assert(_count == rt._count);
  346. return true;
  347. }
  348. PUGI__NS_END
  349. #endif
  350. PUGI__NS_BEGIN
  351. #ifdef PUGIXML_COMPACT
  352. static const uintptr_t xml_memory_block_alignment = 4;
  353. #else
  354. static const uintptr_t xml_memory_block_alignment = sizeof(void*);
  355. #endif
  356. // extra metadata bits
  357. static const uintptr_t xml_memory_page_contents_shared_mask = 64;
  358. static const uintptr_t xml_memory_page_name_allocated_mask = 32;
  359. static const uintptr_t xml_memory_page_value_allocated_mask = 16;
  360. static const uintptr_t xml_memory_page_type_mask = 15;
  361. // combined masks for string uniqueness
  362. static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
  363. static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
  364. #ifdef PUGIXML_COMPACT
  365. #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
  366. #define PUGI__GETPAGE_IMPL(header) (header).get_page()
  367. #else
  368. #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
  369. // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  370. #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
  371. #endif
  372. #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
  373. #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
  374. struct xml_allocator;
  375. struct xml_memory_page
  376. {
  377. static xml_memory_page* construct(void* memory)
  378. {
  379. xml_memory_page* result = static_cast<xml_memory_page*>(memory);
  380. result->allocator = 0;
  381. result->prev = 0;
  382. result->next = 0;
  383. result->busy_size = 0;
  384. result->freed_size = 0;
  385. #ifdef PUGIXML_COMPACT
  386. result->compact_string_base = 0;
  387. result->compact_shared_parent = 0;
  388. result->compact_page_marker = 0;
  389. #endif
  390. return result;
  391. }
  392. xml_allocator* allocator;
  393. xml_memory_page* prev;
  394. xml_memory_page* next;
  395. size_t busy_size;
  396. size_t freed_size;
  397. #ifdef PUGIXML_COMPACT
  398. char_t* compact_string_base;
  399. void* compact_shared_parent;
  400. uint32_t* compact_page_marker;
  401. #endif
  402. };
  403. static const size_t xml_memory_page_size =
  404. #ifdef PUGIXML_MEMORY_PAGE_SIZE
  405. (PUGIXML_MEMORY_PAGE_SIZE)
  406. #else
  407. 32768
  408. #endif
  409. - sizeof(xml_memory_page);
  410. struct xml_memory_string_header
  411. {
  412. uint16_t page_offset; // offset from page->data
  413. uint16_t full_size; // 0 if string occupies whole page
  414. };
  415. struct xml_allocator
  416. {
  417. xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
  418. {
  419. #ifdef PUGIXML_COMPACT
  420. _hash = 0;
  421. #endif
  422. }
  423. xml_memory_page* allocate_page(size_t data_size)
  424. {
  425. size_t size = sizeof(xml_memory_page) + data_size;
  426. // allocate block with some alignment, leaving memory for worst-case padding
  427. void* memory = xml_memory::allocate(size);
  428. if (!memory) return 0;
  429. // prepare page structure
  430. xml_memory_page* page = xml_memory_page::construct(memory);
  431. assert(page);
  432. assert(this == _root->allocator);
  433. page->allocator = this;
  434. return page;
  435. }
  436. static void deallocate_page(xml_memory_page* page)
  437. {
  438. xml_memory::deallocate(page);
  439. }
  440. void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
  441. void* allocate_memory(size_t size, xml_memory_page*& out_page)
  442. {
  443. if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
  444. return allocate_memory_oob(size, out_page);
  445. void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
  446. _busy_size += size;
  447. out_page = _root;
  448. return buf;
  449. }
  450. #ifdef PUGIXML_COMPACT
  451. void* allocate_object(size_t size, xml_memory_page*& out_page)
  452. {
  453. void* result = allocate_memory(size + sizeof(uint32_t), out_page);
  454. if (!result) return 0;
  455. // adjust for marker
  456. ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
  457. if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
  458. {
  459. // insert new marker
  460. uint32_t* marker = static_cast<uint32_t*>(result);
  461. *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
  462. out_page->compact_page_marker = marker;
  463. // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
  464. // this will make sure deallocate_memory correctly tracks the size
  465. out_page->freed_size += sizeof(uint32_t);
  466. return marker + 1;
  467. }
  468. else
  469. {
  470. // roll back uint32_t part
  471. _busy_size -= sizeof(uint32_t);
  472. return result;
  473. }
  474. }
  475. #else
  476. void* allocate_object(size_t size, xml_memory_page*& out_page)
  477. {
  478. return allocate_memory(size, out_page);
  479. }
  480. #endif
  481. void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
  482. {
  483. if (page == _root) page->busy_size = _busy_size;
  484. assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
  485. (void)!ptr;
  486. page->freed_size += size;
  487. assert(page->freed_size <= page->busy_size);
  488. if (page->freed_size == page->busy_size)
  489. {
  490. if (page->next == 0)
  491. {
  492. assert(_root == page);
  493. // top page freed, just reset sizes
  494. page->busy_size = 0;
  495. page->freed_size = 0;
  496. #ifdef PUGIXML_COMPACT
  497. // reset compact state to maximize efficiency
  498. page->compact_string_base = 0;
  499. page->compact_shared_parent = 0;
  500. page->compact_page_marker = 0;
  501. #endif
  502. _busy_size = 0;
  503. }
  504. else
  505. {
  506. assert(_root != page);
  507. assert(page->prev);
  508. // remove from the list
  509. page->prev->next = page->next;
  510. page->next->prev = page->prev;
  511. // deallocate
  512. deallocate_page(page);
  513. }
  514. }
  515. }
  516. char_t* allocate_string(size_t length)
  517. {
  518. static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
  519. PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
  520. // allocate memory for string and header block
  521. size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
  522. // round size up to block alignment boundary
  523. size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
  524. xml_memory_page* page;
  525. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
  526. if (!header) return 0;
  527. // setup header
  528. ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
  529. assert(page_offset % xml_memory_block_alignment == 0);
  530. assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
  531. header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
  532. // full_size == 0 for large strings that occupy the whole page
  533. assert(full_size % xml_memory_block_alignment == 0);
  534. assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
  535. header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
  536. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  537. // header is guaranteed a pointer-sized alignment, which should be enough for char_t
  538. return static_cast<char_t*>(static_cast<void*>(header + 1));
  539. }
  540. void deallocate_string(char_t* string)
  541. {
  542. // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
  543. // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
  544. // get header
  545. xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
  546. assert(header);
  547. // deallocate
  548. size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
  549. xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
  550. // if full_size == 0 then this string occupies the whole page
  551. size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
  552. deallocate_memory(header, full_size, page);
  553. }
  554. bool reserve()
  555. {
  556. #ifdef PUGIXML_COMPACT
  557. return _hash->reserve();
  558. #else
  559. return true;
  560. #endif
  561. }
  562. xml_memory_page* _root;
  563. size_t _busy_size;
  564. #ifdef PUGIXML_COMPACT
  565. compact_hash_table* _hash;
  566. #endif
  567. };
  568. PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
  569. {
  570. const size_t large_allocation_threshold = xml_memory_page_size / 4;
  571. xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
  572. out_page = page;
  573. if (!page) return 0;
  574. if (size <= large_allocation_threshold)
  575. {
  576. _root->busy_size = _busy_size;
  577. // insert page at the end of linked list
  578. page->prev = _root;
  579. _root->next = page;
  580. _root = page;
  581. _busy_size = size;
  582. }
  583. else
  584. {
  585. // insert page before the end of linked list, so that it is deleted as soon as possible
  586. // the last page is not deleted even if it's empty (see deallocate_memory)
  587. assert(_root->prev);
  588. page->prev = _root->prev;
  589. page->next = _root;
  590. _root->prev->next = page;
  591. _root->prev = page;
  592. page->busy_size = size;
  593. }
  594. return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
  595. }
  596. PUGI__NS_END
  597. #ifdef PUGIXML_COMPACT
  598. PUGI__NS_BEGIN
  599. static const uintptr_t compact_alignment_log2 = 2;
  600. static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
  601. class compact_header
  602. {
  603. public:
  604. compact_header(xml_memory_page* page, unsigned int flags)
  605. {
  606. PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
  607. ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
  608. assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
  609. _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
  610. _flags = static_cast<unsigned char>(flags);
  611. }
  612. void operator&=(uintptr_t mod)
  613. {
  614. _flags &= static_cast<unsigned char>(mod);
  615. }
  616. void operator|=(uintptr_t mod)
  617. {
  618. _flags |= static_cast<unsigned char>(mod);
  619. }
  620. uintptr_t operator&(uintptr_t mod) const
  621. {
  622. return _flags & mod;
  623. }
  624. xml_memory_page* get_page() const
  625. {
  626. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  627. const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
  628. const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
  629. return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
  630. }
  631. private:
  632. unsigned char _page;
  633. unsigned char _flags;
  634. };
  635. PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
  636. {
  637. const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
  638. return header->get_page();
  639. }
  640. template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
  641. {
  642. return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
  643. }
  644. template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
  645. {
  646. compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
  647. }
  648. template <typename T, int header_offset, int start = -126> class compact_pointer
  649. {
  650. public:
  651. compact_pointer(): _data(0)
  652. {
  653. }
  654. void operator=(const compact_pointer& rhs)
  655. {
  656. *this = rhs + 0;
  657. }
  658. void operator=(T* value)
  659. {
  660. if (value)
  661. {
  662. // value is guaranteed to be compact-aligned; 'this' is not
  663. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  664. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  665. // compensate for arithmetic shift rounding for negative values
  666. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  667. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
  668. if (static_cast<uintptr_t>(offset) <= 253)
  669. _data = static_cast<unsigned char>(offset + 1);
  670. else
  671. {
  672. compact_set_value<header_offset>(this, value);
  673. _data = 255;
  674. }
  675. }
  676. else
  677. _data = 0;
  678. }
  679. operator T*() const
  680. {
  681. if (_data)
  682. {
  683. if (_data < 255)
  684. {
  685. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  686. return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
  687. }
  688. else
  689. return compact_get_value<header_offset, T>(this);
  690. }
  691. else
  692. return 0;
  693. }
  694. T* operator->() const
  695. {
  696. return *this;
  697. }
  698. private:
  699. unsigned char _data;
  700. };
  701. template <typename T, int header_offset> class compact_pointer_parent
  702. {
  703. public:
  704. compact_pointer_parent(): _data(0)
  705. {
  706. }
  707. void operator=(const compact_pointer_parent& rhs)
  708. {
  709. *this = rhs + 0;
  710. }
  711. void operator=(T* value)
  712. {
  713. if (value)
  714. {
  715. // value is guaranteed to be compact-aligned; 'this' is not
  716. // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
  717. // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
  718. // compensate for arithmetic shift behavior for negative values
  719. ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
  720. ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
  721. if (static_cast<uintptr_t>(offset) <= 65533)
  722. {
  723. _data = static_cast<unsigned short>(offset + 1);
  724. }
  725. else
  726. {
  727. xml_memory_page* page = compact_get_page(this, header_offset);
  728. if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
  729. page->compact_shared_parent = value;
  730. if (page->compact_shared_parent == value)
  731. {
  732. _data = 65534;
  733. }
  734. else
  735. {
  736. compact_set_value<header_offset>(this, value);
  737. _data = 65535;
  738. }
  739. }
  740. }
  741. else
  742. {
  743. _data = 0;
  744. }
  745. }
  746. operator T*() const
  747. {
  748. if (_data)
  749. {
  750. if (_data < 65534)
  751. {
  752. uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
  753. return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
  754. }
  755. else if (_data == 65534)
  756. return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
  757. else
  758. return compact_get_value<header_offset, T>(this);
  759. }
  760. else
  761. return 0;
  762. }
  763. T* operator->() const
  764. {
  765. return *this;
  766. }
  767. private:
  768. uint16_t _data;
  769. };
  770. template <int header_offset, int base_offset> class compact_string
  771. {
  772. public:
  773. compact_string(): _data(0)
  774. {
  775. }
  776. void operator=(const compact_string& rhs)
  777. {
  778. *this = rhs + 0;
  779. }
  780. void operator=(char_t* value)
  781. {
  782. if (value)
  783. {
  784. xml_memory_page* page = compact_get_page(this, header_offset);
  785. if (PUGI__UNLIKELY(page->compact_string_base == 0))
  786. page->compact_string_base = value;
  787. ptrdiff_t offset = value - page->compact_string_base;
  788. if (static_cast<uintptr_t>(offset) < (65535 << 7))
  789. {
  790. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  791. uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
  792. if (*base == 0)
  793. {
  794. *base = static_cast<uint16_t>((offset >> 7) + 1);
  795. _data = static_cast<unsigned char>((offset & 127) + 1);
  796. }
  797. else
  798. {
  799. ptrdiff_t remainder = offset - ((*base - 1) << 7);
  800. if (static_cast<uintptr_t>(remainder) <= 253)
  801. {
  802. _data = static_cast<unsigned char>(remainder + 1);
  803. }
  804. else
  805. {
  806. compact_set_value<header_offset>(this, value);
  807. _data = 255;
  808. }
  809. }
  810. }
  811. else
  812. {
  813. compact_set_value<header_offset>(this, value);
  814. _data = 255;
  815. }
  816. }
  817. else
  818. {
  819. _data = 0;
  820. }
  821. }
  822. operator char_t*() const
  823. {
  824. if (_data)
  825. {
  826. if (_data < 255)
  827. {
  828. xml_memory_page* page = compact_get_page(this, header_offset);
  829. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  830. const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
  831. assert(*base);
  832. ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
  833. return page->compact_string_base + offset;
  834. }
  835. else
  836. {
  837. return compact_get_value<header_offset, char_t>(this);
  838. }
  839. }
  840. else
  841. return 0;
  842. }
  843. private:
  844. unsigned char _data;
  845. };
  846. PUGI__NS_END
  847. #endif
  848. #ifdef PUGIXML_COMPACT
  849. namespace pugi
  850. {
  851. struct xml_attribute_struct
  852. {
  853. xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
  854. {
  855. PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
  856. }
  857. impl::compact_header header;
  858. uint16_t namevalue_base;
  859. impl::compact_string<4, 2> name;
  860. impl::compact_string<5, 3> value;
  861. impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
  862. impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
  863. };
  864. struct xml_node_struct
  865. {
  866. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
  867. {
  868. PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
  869. }
  870. impl::compact_header header;
  871. uint16_t namevalue_base;
  872. impl::compact_string<4, 2> name;
  873. impl::compact_string<5, 3> value;
  874. impl::compact_pointer_parent<xml_node_struct, 6> parent;
  875. impl::compact_pointer<xml_node_struct, 8, 0> first_child;
  876. impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
  877. impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
  878. impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
  879. };
  880. }
  881. #else
  882. namespace pugi
  883. {
  884. struct xml_attribute_struct
  885. {
  886. xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
  887. {
  888. header = PUGI__GETHEADER_IMPL(this, page, 0);
  889. }
  890. uintptr_t header;
  891. char_t* name;
  892. char_t* value;
  893. xml_attribute_struct* prev_attribute_c;
  894. xml_attribute_struct* next_attribute;
  895. };
  896. struct xml_node_struct
  897. {
  898. xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
  899. {
  900. header = PUGI__GETHEADER_IMPL(this, page, type);
  901. }
  902. uintptr_t header;
  903. char_t* name;
  904. char_t* value;
  905. xml_node_struct* parent;
  906. xml_node_struct* first_child;
  907. xml_node_struct* prev_sibling_c;
  908. xml_node_struct* next_sibling;
  909. xml_attribute_struct* first_attribute;
  910. };
  911. }
  912. #endif
  913. PUGI__NS_BEGIN
  914. struct xml_extra_buffer
  915. {
  916. char_t* buffer;
  917. xml_extra_buffer* next;
  918. };
  919. struct xml_document_struct: public xml_node_struct, public xml_allocator
  920. {
  921. xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
  922. {
  923. }
  924. const char_t* buffer;
  925. xml_extra_buffer* extra_buffers;
  926. #ifdef PUGIXML_COMPACT
  927. compact_hash_table hash;
  928. #endif
  929. };
  930. template <typename Object> inline xml_allocator& get_allocator(const Object* object)
  931. {
  932. assert(object);
  933. return *PUGI__GETPAGE(object)->allocator;
  934. }
  935. template <typename Object> inline xml_document_struct& get_document(const Object* object)
  936. {
  937. assert(object);
  938. return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
  939. }
  940. PUGI__NS_END
  941. // Low-level DOM operations
  942. PUGI__NS_BEGIN
  943. inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
  944. {
  945. xml_memory_page* page;
  946. void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
  947. if (!memory) return 0;
  948. return new (memory) xml_attribute_struct(page);
  949. }
  950. inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
  951. {
  952. xml_memory_page* page;
  953. void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
  954. if (!memory) return 0;
  955. return new (memory) xml_node_struct(page, type);
  956. }
  957. inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
  958. {
  959. if (a->header & impl::xml_memory_page_name_allocated_mask)
  960. alloc.deallocate_string(a->name);
  961. if (a->header & impl::xml_memory_page_value_allocated_mask)
  962. alloc.deallocate_string(a->value);
  963. alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
  964. }
  965. inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
  966. {
  967. if (n->header & impl::xml_memory_page_name_allocated_mask)
  968. alloc.deallocate_string(n->name);
  969. if (n->header & impl::xml_memory_page_value_allocated_mask)
  970. alloc.deallocate_string(n->value);
  971. for (xml_attribute_struct* attr = n->first_attribute; attr; )
  972. {
  973. xml_attribute_struct* next = attr->next_attribute;
  974. destroy_attribute(attr, alloc);
  975. attr = next;
  976. }
  977. for (xml_node_struct* child = n->first_child; child; )
  978. {
  979. xml_node_struct* next = child->next_sibling;
  980. destroy_node(child, alloc);
  981. child = next;
  982. }
  983. alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
  984. }
  985. inline void append_node(xml_node_struct* child, xml_node_struct* node)
  986. {
  987. child->parent = node;
  988. xml_node_struct* head = node->first_child;
  989. if (head)
  990. {
  991. xml_node_struct* tail = head->prev_sibling_c;
  992. tail->next_sibling = child;
  993. child->prev_sibling_c = tail;
  994. head->prev_sibling_c = child;
  995. }
  996. else
  997. {
  998. node->first_child = child;
  999. child->prev_sibling_c = child;
  1000. }
  1001. }
  1002. inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
  1003. {
  1004. child->parent = node;
  1005. xml_node_struct* head = node->first_child;
  1006. if (head)
  1007. {
  1008. child->prev_sibling_c = head->prev_sibling_c;
  1009. head->prev_sibling_c = child;
  1010. }
  1011. else
  1012. child->prev_sibling_c = child;
  1013. child->next_sibling = head;
  1014. node->first_child = child;
  1015. }
  1016. inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
  1017. {
  1018. xml_node_struct* parent = node->parent;
  1019. child->parent = parent;
  1020. xml_node_struct* next = node->next_sibling;
  1021. if (next)
  1022. next->prev_sibling_c = child;
  1023. else
  1024. parent->first_child->prev_sibling_c = child;
  1025. child->next_sibling = next;
  1026. child->prev_sibling_c = node;
  1027. node->next_sibling = child;
  1028. }
  1029. inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
  1030. {
  1031. xml_node_struct* parent = node->parent;
  1032. child->parent = parent;
  1033. xml_node_struct* prev = node->prev_sibling_c;
  1034. if (prev->next_sibling)
  1035. prev->next_sibling = child;
  1036. else
  1037. parent->first_child = child;
  1038. child->prev_sibling_c = prev;
  1039. child->next_sibling = node;
  1040. node->prev_sibling_c = child;
  1041. }
  1042. inline void remove_node(xml_node_struct* node)
  1043. {
  1044. xml_node_struct* parent = node->parent;
  1045. xml_node_struct* next = node->next_sibling;
  1046. xml_node_struct* prev = node->prev_sibling_c;
  1047. if (next)
  1048. next->prev_sibling_c = prev;
  1049. else
  1050. parent->first_child->prev_sibling_c = prev;
  1051. if (prev->next_sibling)
  1052. prev->next_sibling = next;
  1053. else
  1054. parent->first_child = next;
  1055. node->parent = 0;
  1056. node->prev_sibling_c = 0;
  1057. node->next_sibling = 0;
  1058. }
  1059. inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1060. {
  1061. xml_attribute_struct* head = node->first_attribute;
  1062. if (head)
  1063. {
  1064. xml_attribute_struct* tail = head->prev_attribute_c;
  1065. tail->next_attribute = attr;
  1066. attr->prev_attribute_c = tail;
  1067. head->prev_attribute_c = attr;
  1068. }
  1069. else
  1070. {
  1071. node->first_attribute = attr;
  1072. attr->prev_attribute_c = attr;
  1073. }
  1074. }
  1075. inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1076. {
  1077. xml_attribute_struct* head = node->first_attribute;
  1078. if (head)
  1079. {
  1080. attr->prev_attribute_c = head->prev_attribute_c;
  1081. head->prev_attribute_c = attr;
  1082. }
  1083. else
  1084. attr->prev_attribute_c = attr;
  1085. attr->next_attribute = head;
  1086. node->first_attribute = attr;
  1087. }
  1088. inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1089. {
  1090. xml_attribute_struct* next = place->next_attribute;
  1091. if (next)
  1092. next->prev_attribute_c = attr;
  1093. else
  1094. node->first_attribute->prev_attribute_c = attr;
  1095. attr->next_attribute = next;
  1096. attr->prev_attribute_c = place;
  1097. place->next_attribute = attr;
  1098. }
  1099. inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
  1100. {
  1101. xml_attribute_struct* prev = place->prev_attribute_c;
  1102. if (prev->next_attribute)
  1103. prev->next_attribute = attr;
  1104. else
  1105. node->first_attribute = attr;
  1106. attr->prev_attribute_c = prev;
  1107. attr->next_attribute = place;
  1108. place->prev_attribute_c = attr;
  1109. }
  1110. inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
  1111. {
  1112. xml_attribute_struct* next = attr->next_attribute;
  1113. xml_attribute_struct* prev = attr->prev_attribute_c;
  1114. if (next)
  1115. next->prev_attribute_c = prev;
  1116. else
  1117. node->first_attribute->prev_attribute_c = prev;
  1118. if (prev->next_attribute)
  1119. prev->next_attribute = next;
  1120. else
  1121. node->first_attribute = next;
  1122. attr->prev_attribute_c = 0;
  1123. attr->next_attribute = 0;
  1124. }
  1125. PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
  1126. {
  1127. if (!alloc.reserve()) return 0;
  1128. xml_node_struct* child = allocate_node(alloc, type);
  1129. if (!child) return 0;
  1130. append_node(child, node);
  1131. return child;
  1132. }
  1133. PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
  1134. {
  1135. if (!alloc.reserve()) return 0;
  1136. xml_attribute_struct* attr = allocate_attribute(alloc);
  1137. if (!attr) return 0;
  1138. append_attribute(attr, node);
  1139. return attr;
  1140. }
  1141. PUGI__NS_END
  1142. // Helper classes for code generation
  1143. PUGI__NS_BEGIN
  1144. struct opt_false
  1145. {
  1146. enum { value = 0 };
  1147. };
  1148. struct opt_true
  1149. {
  1150. enum { value = 1 };
  1151. };
  1152. PUGI__NS_END
  1153. // Unicode utilities
  1154. PUGI__NS_BEGIN
  1155. inline uint16_t endian_swap(uint16_t value)
  1156. {
  1157. return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
  1158. }
  1159. inline uint32_t endian_swap(uint32_t value)
  1160. {
  1161. return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
  1162. }
  1163. struct utf8_counter
  1164. {
  1165. typedef size_t value_type;
  1166. static value_type low(value_type result, uint32_t ch)
  1167. {
  1168. // U+0000..U+007F
  1169. if (ch < 0x80) return result + 1;
  1170. // U+0080..U+07FF
  1171. else if (ch < 0x800) return result + 2;
  1172. // U+0800..U+FFFF
  1173. else return result + 3;
  1174. }
  1175. static value_type high(value_type result, uint32_t)
  1176. {
  1177. // U+10000..U+10FFFF
  1178. return result + 4;
  1179. }
  1180. };
  1181. struct utf8_writer
  1182. {
  1183. typedef uint8_t* value_type;
  1184. static value_type low(value_type result, uint32_t ch)
  1185. {
  1186. // U+0000..U+007F
  1187. if (ch < 0x80)
  1188. {
  1189. *result = static_cast<uint8_t>(ch);
  1190. return result + 1;
  1191. }
  1192. // U+0080..U+07FF
  1193. else if (ch < 0x800)
  1194. {
  1195. result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
  1196. result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1197. return result + 2;
  1198. }
  1199. // U+0800..U+FFFF
  1200. else
  1201. {
  1202. result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
  1203. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1204. result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1205. return result + 3;
  1206. }
  1207. }
  1208. static value_type high(value_type result, uint32_t ch)
  1209. {
  1210. // U+10000..U+10FFFF
  1211. result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
  1212. result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
  1213. result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
  1214. result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
  1215. return result + 4;
  1216. }
  1217. static value_type any(value_type result, uint32_t ch)
  1218. {
  1219. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1220. }
  1221. };
  1222. struct utf16_counter
  1223. {
  1224. typedef size_t value_type;
  1225. static value_type low(value_type result, uint32_t)
  1226. {
  1227. return result + 1;
  1228. }
  1229. static value_type high(value_type result, uint32_t)
  1230. {
  1231. return result + 2;
  1232. }
  1233. };
  1234. struct utf16_writer
  1235. {
  1236. typedef uint16_t* value_type;
  1237. static value_type low(value_type result, uint32_t ch)
  1238. {
  1239. *result = static_cast<uint16_t>(ch);
  1240. return result + 1;
  1241. }
  1242. static value_type high(value_type result, uint32_t ch)
  1243. {
  1244. uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
  1245. uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
  1246. result[0] = static_cast<uint16_t>(0xD800 + msh);
  1247. result[1] = static_cast<uint16_t>(0xDC00 + lsh);
  1248. return result + 2;
  1249. }
  1250. static value_type any(value_type result, uint32_t ch)
  1251. {
  1252. return (ch < 0x10000) ? low(result, ch) : high(result, ch);
  1253. }
  1254. };
  1255. struct utf32_counter
  1256. {
  1257. typedef size_t value_type;
  1258. static value_type low(value_type result, uint32_t)
  1259. {
  1260. return result + 1;
  1261. }
  1262. static value_type high(value_type result, uint32_t)
  1263. {
  1264. return result + 1;
  1265. }
  1266. };
  1267. struct utf32_writer
  1268. {
  1269. typedef uint32_t* value_type;
  1270. static value_type low(value_type result, uint32_t ch)
  1271. {
  1272. *result = ch;
  1273. return result + 1;
  1274. }
  1275. static value_type high(value_type result, uint32_t ch)
  1276. {
  1277. *result = ch;
  1278. return result + 1;
  1279. }
  1280. static value_type any(value_type result, uint32_t ch)
  1281. {
  1282. *result = ch;
  1283. return result + 1;
  1284. }
  1285. };
  1286. struct latin1_writer
  1287. {
  1288. typedef uint8_t* value_type;
  1289. static value_type low(value_type result, uint32_t ch)
  1290. {
  1291. *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
  1292. return result + 1;
  1293. }
  1294. static value_type high(value_type result, uint32_t ch)
  1295. {
  1296. (void)ch;
  1297. *result = '?';
  1298. return result + 1;
  1299. }
  1300. };
  1301. struct utf8_decoder
  1302. {
  1303. typedef uint8_t type;
  1304. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1305. {
  1306. const uint8_t utf8_byte_mask = 0x3f;
  1307. while (size)
  1308. {
  1309. uint8_t lead = *data;
  1310. // 0xxxxxxx -> U+0000..U+007F
  1311. if (lead < 0x80)
  1312. {
  1313. result = Traits::low(result, lead);
  1314. data += 1;
  1315. size -= 1;
  1316. // process aligned single-byte (ascii) blocks
  1317. if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
  1318. {
  1319. // round-trip through void* to silence 'cast increases required alignment of target type' warnings
  1320. while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
  1321. {
  1322. result = Traits::low(result, data[0]);
  1323. result = Traits::low(result, data[1]);
  1324. result = Traits::low(result, data[2]);
  1325. result = Traits::low(result, data[3]);
  1326. data += 4;
  1327. size -= 4;
  1328. }
  1329. }
  1330. }
  1331. // 110xxxxx -> U+0080..U+07FF
  1332. else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
  1333. {
  1334. result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
  1335. data += 2;
  1336. size -= 2;
  1337. }
  1338. // 1110xxxx -> U+0800-U+FFFF
  1339. else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
  1340. {
  1341. result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
  1342. data += 3;
  1343. size -= 3;
  1344. }
  1345. // 11110xxx -> U+10000..U+10FFFF
  1346. else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
  1347. {
  1348. result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
  1349. data += 4;
  1350. size -= 4;
  1351. }
  1352. // 10xxxxxx or 11111xxx -> invalid
  1353. else
  1354. {
  1355. data += 1;
  1356. size -= 1;
  1357. }
  1358. }
  1359. return result;
  1360. }
  1361. };
  1362. template <typename opt_swap> struct utf16_decoder
  1363. {
  1364. typedef uint16_t type;
  1365. template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
  1366. {
  1367. while (size)
  1368. {
  1369. uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1370. // U+0000..U+D7FF
  1371. if (lead < 0xD800)
  1372. {
  1373. result = Traits::low(result, lead);
  1374. data += 1;
  1375. size -= 1;
  1376. }
  1377. // U+E000..U+FFFF
  1378. else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
  1379. {
  1380. result = Traits::low(result, lead);
  1381. data += 1;
  1382. size -= 1;
  1383. }
  1384. // surrogate pair lead
  1385. else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
  1386. {
  1387. uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
  1388. if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
  1389. {
  1390. result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
  1391. data += 2;
  1392. size -= 2;
  1393. }
  1394. else
  1395. {
  1396. data += 1;
  1397. size -= 1;
  1398. }
  1399. }
  1400. else
  1401. {
  1402. data += 1;
  1403. size -= 1;
  1404. }
  1405. }
  1406. return result;
  1407. }
  1408. };
  1409. template <typename opt_swap> struct utf32_decoder
  1410. {
  1411. typedef uint32_t type;
  1412. template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
  1413. {
  1414. while (size)
  1415. {
  1416. uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
  1417. // U+0000..U+FFFF
  1418. if (lead < 0x10000)
  1419. {
  1420. result = Traits::low(result, lead);
  1421. data += 1;
  1422. size -= 1;
  1423. }
  1424. // U+10000..U+10FFFF
  1425. else
  1426. {
  1427. result = Traits::high(result, lead);
  1428. data += 1;
  1429. size -= 1;
  1430. }
  1431. }
  1432. return result;
  1433. }
  1434. };
  1435. struct latin1_decoder
  1436. {
  1437. typedef uint8_t type;
  1438. template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
  1439. {
  1440. while (size)
  1441. {
  1442. result = Traits::low(result, *data);
  1443. data += 1;
  1444. size -= 1;
  1445. }
  1446. return result;
  1447. }
  1448. };
  1449. template <size_t size> struct wchar_selector;
  1450. template <> struct wchar_selector<2>
  1451. {
  1452. typedef uint16_t type;
  1453. typedef utf16_counter counter;
  1454. typedef utf16_writer writer;
  1455. typedef utf16_decoder<opt_false> decoder;
  1456. };
  1457. template <> struct wchar_selector<4>
  1458. {
  1459. typedef uint32_t type;
  1460. typedef utf32_counter counter;
  1461. typedef utf32_writer writer;
  1462. typedef utf32_decoder<opt_false> decoder;
  1463. };
  1464. typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
  1465. typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
  1466. struct wchar_decoder
  1467. {
  1468. typedef wchar_t type;
  1469. template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
  1470. {
  1471. typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
  1472. return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
  1473. }
  1474. };
  1475. #ifdef PUGIXML_WCHAR_MODE
  1476. PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
  1477. {
  1478. for (size_t i = 0; i < length; ++i)
  1479. result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
  1480. }
  1481. #endif
  1482. PUGI__NS_END
  1483. PUGI__NS_BEGIN
  1484. enum chartype_t
  1485. {
  1486. ct_parse_pcdata = 1, // \0, &, \r, <
  1487. ct_parse_attr = 2, // \0, &, \r, ', "
  1488. ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
  1489. ct_space = 8, // \r, \n, space, tab
  1490. ct_parse_cdata = 16, // \0, ], >, \r
  1491. ct_parse_comment = 32, // \0, -, >, \r
  1492. ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
  1493. ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
  1494. };
  1495. static const unsigned char chartype_table[256] =
  1496. {
  1497. 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
  1498. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
  1499. 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
  1500. 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
  1501. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
  1502. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
  1503. 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
  1504. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
  1505. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
  1506. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1507. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1508. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1509. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1510. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1511. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
  1512. 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
  1513. };
  1514. enum chartypex_t
  1515. {
  1516. ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
  1517. ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
  1518. ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
  1519. ctx_digit = 8, // 0-9
  1520. ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
  1521. };
  1522. static const unsigned char chartypex_table[256] =
  1523. {
  1524. 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
  1525. 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
  1526. 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
  1527. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
  1528. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
  1529. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
  1530. 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
  1531. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
  1532. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
  1533. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1534. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1535. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1536. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1537. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1538. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
  1539. 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
  1540. };
  1541. #ifdef PUGIXML_WCHAR_MODE
  1542. #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
  1543. #else
  1544. #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
  1545. #endif
  1546. #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
  1547. #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
  1548. PUGI__FN bool is_little_endian()
  1549. {
  1550. unsigned int ui = 1;
  1551. return *reinterpret_cast<unsigned char*>(&ui) == 1;
  1552. }
  1553. PUGI__FN xml_encoding get_wchar_encoding()
  1554. {
  1555. PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
  1556. if (sizeof(wchar_t) == 2)
  1557. return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1558. else
  1559. return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1560. }
  1561. PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
  1562. {
  1563. #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
  1564. #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
  1565. // check if we have a non-empty XML declaration
  1566. if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
  1567. return false;
  1568. // scan XML declaration until the encoding field
  1569. for (size_t i = 6; i + 1 < size; ++i)
  1570. {
  1571. // declaration can not contain ? in quoted values
  1572. if (data[i] == '?')
  1573. return false;
  1574. if (data[i] == 'e' && data[i + 1] == 'n')
  1575. {
  1576. size_t offset = i;
  1577. // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
  1578. PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
  1579. PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
  1580. // S? = S?
  1581. PUGI__SCANCHARTYPE(ct_space);
  1582. PUGI__SCANCHAR('=');
  1583. PUGI__SCANCHARTYPE(ct_space);
  1584. // the only two valid delimiters are ' and "
  1585. uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
  1586. PUGI__SCANCHAR(delimiter);
  1587. size_t start = offset;
  1588. out_encoding = data + offset;
  1589. PUGI__SCANCHARTYPE(ct_symbol);
  1590. out_length = offset - start;
  1591. PUGI__SCANCHAR(delimiter);
  1592. return true;
  1593. }
  1594. }
  1595. return false;
  1596. #undef PUGI__SCANCHAR
  1597. #undef PUGI__SCANCHARTYPE
  1598. }
  1599. PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
  1600. {
  1601. // skip encoding autodetection if input buffer is too small
  1602. if (size < 4) return encoding_utf8;
  1603. uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
  1604. // look for BOM in first few bytes
  1605. if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
  1606. if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1607. if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
  1608. if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
  1609. if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
  1610. // look for <, <? or <?xm in various encodings
  1611. if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
  1612. if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
  1613. if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
  1614. if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
  1615. // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
  1616. if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
  1617. if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
  1618. // no known BOM detected; parse declaration
  1619. const uint8_t* enc = 0;
  1620. size_t enc_length = 0;
  1621. if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
  1622. {
  1623. // iso-8859-1 (case-insensitive)
  1624. if (enc_length == 10
  1625. && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
  1626. && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
  1627. && enc[8] == '-' && enc[9] == '1')
  1628. return encoding_latin1;
  1629. // latin1 (case-insensitive)
  1630. if (enc_length == 6
  1631. && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
  1632. && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
  1633. && enc[5] == '1')
  1634. return encoding_latin1;
  1635. }
  1636. return encoding_utf8;
  1637. }
  1638. PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
  1639. {
  1640. // replace wchar encoding with utf implementation
  1641. if (encoding == encoding_wchar) return get_wchar_encoding();
  1642. // replace utf16 encoding with utf16 with specific endianness
  1643. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1644. // replace utf32 encoding with utf32 with specific endianness
  1645. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1646. // only do autodetection if no explicit encoding is requested
  1647. if (encoding != encoding_auto) return encoding;
  1648. // try to guess encoding (based on XML specification, Appendix F.1)
  1649. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1650. return guess_buffer_encoding(data, size);
  1651. }
  1652. PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1653. {
  1654. size_t length = size / sizeof(char_t);
  1655. if (is_mutable)
  1656. {
  1657. out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
  1658. out_length = length;
  1659. }
  1660. else
  1661. {
  1662. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1663. if (!buffer) return false;
  1664. if (contents)
  1665. memcpy(buffer, contents, length * sizeof(char_t));
  1666. else
  1667. assert(length == 0);
  1668. buffer[length] = 0;
  1669. out_buffer = buffer;
  1670. out_length = length + 1;
  1671. }
  1672. return true;
  1673. }
  1674. #ifdef PUGIXML_WCHAR_MODE
  1675. PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
  1676. {
  1677. return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
  1678. (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
  1679. }
  1680. PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1681. {
  1682. const char_t* data = static_cast<const char_t*>(contents);
  1683. size_t length = size / sizeof(char_t);
  1684. if (is_mutable)
  1685. {
  1686. char_t* buffer = const_cast<char_t*>(data);
  1687. convert_wchar_endian_swap(buffer, data, length);
  1688. out_buffer = buffer;
  1689. out_length = length;
  1690. }
  1691. else
  1692. {
  1693. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1694. if (!buffer) return false;
  1695. convert_wchar_endian_swap(buffer, data, length);
  1696. buffer[length] = 0;
  1697. out_buffer = buffer;
  1698. out_length = length + 1;
  1699. }
  1700. return true;
  1701. }
  1702. template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1703. {
  1704. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1705. size_t data_length = size / sizeof(typename D::type);
  1706. // first pass: get length in wchar_t units
  1707. size_t length = D::process(data, data_length, 0, wchar_counter());
  1708. // allocate buffer of suitable length
  1709. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1710. if (!buffer) return false;
  1711. // second pass: convert utf16 input to wchar_t
  1712. wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
  1713. wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
  1714. assert(oend == obegin + length);
  1715. *oend = 0;
  1716. out_buffer = buffer;
  1717. out_length = length + 1;
  1718. return true;
  1719. }
  1720. PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1721. {
  1722. // get native encoding
  1723. xml_encoding wchar_encoding = get_wchar_encoding();
  1724. // fast path: no conversion required
  1725. if (encoding == wchar_encoding)
  1726. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1727. // only endian-swapping is required
  1728. if (need_endian_swap_utf(encoding, wchar_encoding))
  1729. return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
  1730. // source encoding is utf8
  1731. if (encoding == encoding_utf8)
  1732. return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
  1733. // source encoding is utf16
  1734. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1735. {
  1736. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1737. return (native_encoding == encoding) ?
  1738. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1739. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1740. }
  1741. // source encoding is utf32
  1742. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1743. {
  1744. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1745. return (native_encoding == encoding) ?
  1746. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1747. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1748. }
  1749. // source encoding is latin1
  1750. if (encoding == encoding_latin1)
  1751. return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
  1752. assert(false && "Invalid encoding"); // unreachable
  1753. return false;
  1754. }
  1755. #else
  1756. template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
  1757. {
  1758. const typename D::type* data = static_cast<const typename D::type*>(contents);
  1759. size_t data_length = size / sizeof(typename D::type);
  1760. // first pass: get length in utf8 units
  1761. size_t length = D::process(data, data_length, 0, utf8_counter());
  1762. // allocate buffer of suitable length
  1763. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1764. if (!buffer) return false;
  1765. // second pass: convert utf16 input to utf8
  1766. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1767. uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
  1768. assert(oend == obegin + length);
  1769. *oend = 0;
  1770. out_buffer = buffer;
  1771. out_length = length + 1;
  1772. return true;
  1773. }
  1774. PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
  1775. {
  1776. for (size_t i = 0; i < size; ++i)
  1777. if (data[i] > 127)
  1778. return i;
  1779. return size;
  1780. }
  1781. PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
  1782. {
  1783. const uint8_t* data = static_cast<const uint8_t*>(contents);
  1784. size_t data_length = size;
  1785. // get size of prefix that does not need utf8 conversion
  1786. size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
  1787. assert(prefix_length <= data_length);
  1788. const uint8_t* postfix = data + prefix_length;
  1789. size_t postfix_length = data_length - prefix_length;
  1790. // if no conversion is needed, just return the original buffer
  1791. if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1792. // first pass: get length in utf8 units
  1793. size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
  1794. // allocate buffer of suitable length
  1795. char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  1796. if (!buffer) return false;
  1797. // second pass: convert latin1 input to utf8
  1798. memcpy(buffer, data, prefix_length);
  1799. uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
  1800. uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
  1801. assert(oend == obegin + length);
  1802. *oend = 0;
  1803. out_buffer = buffer;
  1804. out_length = length + 1;
  1805. return true;
  1806. }
  1807. PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
  1808. {
  1809. // fast path: no conversion required
  1810. if (encoding == encoding_utf8)
  1811. return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
  1812. // source encoding is utf16
  1813. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  1814. {
  1815. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  1816. return (native_encoding == encoding) ?
  1817. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
  1818. convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
  1819. }
  1820. // source encoding is utf32
  1821. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  1822. {
  1823. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  1824. return (native_encoding == encoding) ?
  1825. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
  1826. convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
  1827. }
  1828. // source encoding is latin1
  1829. if (encoding == encoding_latin1)
  1830. return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
  1831. assert(false && "Invalid encoding"); // unreachable
  1832. return false;
  1833. }
  1834. #endif
  1835. PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
  1836. {
  1837. // get length in utf8 characters
  1838. return wchar_decoder::process(str, length, 0, utf8_counter());
  1839. }
  1840. PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
  1841. {
  1842. // convert to utf8
  1843. uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
  1844. uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
  1845. assert(begin + size == end);
  1846. (void)!end;
  1847. (void)!size;
  1848. }
  1849. #ifndef PUGIXML_NO_STL
  1850. PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
  1851. {
  1852. // first pass: get length in utf8 characters
  1853. size_t size = as_utf8_begin(str, length);
  1854. // allocate resulting string
  1855. std::string result;
  1856. result.resize(size);
  1857. // second pass: convert to utf8
  1858. if (size > 0) as_utf8_end(&result[0], size, str, length);
  1859. return result;
  1860. }
  1861. PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
  1862. {
  1863. const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
  1864. // first pass: get length in wchar_t units
  1865. size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
  1866. // allocate resulting string
  1867. std::basic_string<wchar_t> result;
  1868. result.resize(length);
  1869. // second pass: convert to wchar_t
  1870. if (length > 0)
  1871. {
  1872. wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
  1873. wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
  1874. assert(begin + length == end);
  1875. (void)!end;
  1876. }
  1877. return result;
  1878. }
  1879. #endif
  1880. template <typename Header>
  1881. inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
  1882. {
  1883. // never reuse shared memory
  1884. if (header & xml_memory_page_contents_shared_mask) return false;
  1885. size_t target_length = strlength(target);
  1886. // always reuse document buffer memory if possible
  1887. if ((header & header_mask) == 0) return target_length >= length;
  1888. // reuse heap memory if waste is not too great
  1889. const size_t reuse_threshold = 32;
  1890. return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
  1891. }
  1892. template <typename String, typename Header>
  1893. PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
  1894. {
  1895. if (source_length == 0)
  1896. {
  1897. // empty string and null pointer are equivalent, so just deallocate old memory
  1898. xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
  1899. if (header & header_mask) alloc->deallocate_string(dest);
  1900. // mark the string as not allocated
  1901. dest = 0;
  1902. header &= ~header_mask;
  1903. return true;
  1904. }
  1905. else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
  1906. {
  1907. // we can reuse old buffer, so just copy the new data (including zero terminator)
  1908. memcpy(dest, source, source_length * sizeof(char_t));
  1909. dest[source_length] = 0;
  1910. return true;
  1911. }
  1912. else
  1913. {
  1914. xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
  1915. if (!alloc->reserve()) return false;
  1916. // allocate new buffer
  1917. char_t* buf = alloc->allocate_string(source_length + 1);
  1918. if (!buf) return false;
  1919. // copy the string (including zero terminator)
  1920. memcpy(buf, source, source_length * sizeof(char_t));
  1921. buf[source_length] = 0;
  1922. // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
  1923. if (header & header_mask) alloc->deallocate_string(dest);
  1924. // the string is now allocated, so set the flag
  1925. dest = buf;
  1926. header |= header_mask;
  1927. return true;
  1928. }
  1929. }
  1930. struct gap
  1931. {
  1932. char_t* end;
  1933. size_t size;
  1934. gap(): end(0), size(0)
  1935. {
  1936. }
  1937. // Push new gap, move s count bytes further (skipping the gap).
  1938. // Collapse previous gap.
  1939. void push(char_t*& s, size_t count)
  1940. {
  1941. if (end) // there was a gap already; collapse it
  1942. {
  1943. // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
  1944. assert(s >= end);
  1945. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1946. }
  1947. s += count; // end of current gap
  1948. // "merge" two gaps
  1949. end = s;
  1950. size += count;
  1951. }
  1952. // Collapse all gaps, return past-the-end pointer
  1953. char_t* flush(char_t* s)
  1954. {
  1955. if (end)
  1956. {
  1957. // Move [old_gap_end, current_pos) to [old_gap_start, ...)
  1958. assert(s >= end);
  1959. memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
  1960. return s - size;
  1961. }
  1962. else return s;
  1963. }
  1964. };
  1965. PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
  1966. {
  1967. char_t* stre = s + 1;
  1968. switch (*stre)
  1969. {
  1970. case '#': // &#...
  1971. {
  1972. unsigned int ucsc = 0;
  1973. if (stre[1] == 'x') // &#x... (hex code)
  1974. {
  1975. stre += 2;
  1976. char_t ch = *stre;
  1977. if (ch == ';') return stre;
  1978. for (;;)
  1979. {
  1980. if (static_cast<unsigned int>(ch - '0') <= 9)
  1981. ucsc = 16 * ucsc + (ch - '0');
  1982. else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
  1983. ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
  1984. else if (ch == ';')
  1985. break;
  1986. else // cancel
  1987. return stre;
  1988. ch = *++stre;
  1989. }
  1990. ++stre;
  1991. }
  1992. else // &#... (dec code)
  1993. {
  1994. char_t ch = *++stre;
  1995. if (ch == ';') return stre;
  1996. for (;;)
  1997. {
  1998. if (static_cast<unsigned int>(ch - '0') <= 9)
  1999. ucsc = 10 * ucsc + (ch - '0');
  2000. else if (ch == ';')
  2001. break;
  2002. else // cancel
  2003. return stre;
  2004. ch = *++stre;
  2005. }
  2006. ++stre;
  2007. }
  2008. #ifdef PUGIXML_WCHAR_MODE
  2009. s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
  2010. #else
  2011. s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
  2012. #endif
  2013. g.push(s, stre - s);
  2014. return stre;
  2015. }
  2016. case 'a': // &a
  2017. {
  2018. ++stre;
  2019. if (*stre == 'm') // &am
  2020. {
  2021. if (*++stre == 'p' && *++stre == ';') // &amp;
  2022. {
  2023. *s++ = '&';
  2024. ++stre;
  2025. g.push(s, stre - s);
  2026. return stre;
  2027. }
  2028. }
  2029. else if (*stre == 'p') // &ap
  2030. {
  2031. if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
  2032. {
  2033. *s++ = '\'';
  2034. ++stre;
  2035. g.push(s, stre - s);
  2036. return stre;
  2037. }
  2038. }
  2039. break;
  2040. }
  2041. case 'g': // &g
  2042. {
  2043. if (*++stre == 't' && *++stre == ';') // &gt;
  2044. {
  2045. *s++ = '>';
  2046. ++stre;
  2047. g.push(s, stre - s);
  2048. return stre;
  2049. }
  2050. break;
  2051. }
  2052. case 'l': // &l
  2053. {
  2054. if (*++stre == 't' && *++stre == ';') // &lt;
  2055. {
  2056. *s++ = '<';
  2057. ++stre;
  2058. g.push(s, stre - s);
  2059. return stre;
  2060. }
  2061. break;
  2062. }
  2063. case 'q': // &q
  2064. {
  2065. if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
  2066. {
  2067. *s++ = '"';
  2068. ++stre;
  2069. g.push(s, stre - s);
  2070. return stre;
  2071. }
  2072. break;
  2073. }
  2074. default:
  2075. break;
  2076. }
  2077. return stre;
  2078. }
  2079. // Parser utilities
  2080. #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
  2081. #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
  2082. #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
  2083. #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
  2084. #define PUGI__POPNODE() { cursor = cursor->parent; }
  2085. #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
  2086. #define PUGI__SCANWHILE(X) { while (X) ++s; }
  2087. #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
  2088. #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
  2089. #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
  2090. #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
  2091. PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
  2092. {
  2093. gap g;
  2094. while (true)
  2095. {
  2096. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
  2097. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2098. {
  2099. *s++ = '\n'; // replace first one with 0x0a
  2100. if (*s == '\n') g.push(s, 1);
  2101. }
  2102. else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
  2103. {
  2104. *g.flush(s) = 0;
  2105. return s + (s[2] == '>' ? 3 : 2);
  2106. }
  2107. else if (*s == 0)
  2108. {
  2109. return 0;
  2110. }
  2111. else ++s;
  2112. }
  2113. }
  2114. PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
  2115. {
  2116. gap g;
  2117. while (true)
  2118. {
  2119. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
  2120. if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2121. {
  2122. *s++ = '\n'; // replace first one with 0x0a
  2123. if (*s == '\n') g.push(s, 1);
  2124. }
  2125. else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
  2126. {
  2127. *g.flush(s) = 0;
  2128. return s + 1;
  2129. }
  2130. else if (*s == 0)
  2131. {
  2132. return 0;
  2133. }
  2134. else ++s;
  2135. }
  2136. }
  2137. typedef char_t* (*strconv_pcdata_t)(char_t*);
  2138. template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
  2139. {
  2140. static char_t* parse(char_t* s)
  2141. {
  2142. gap g;
  2143. char_t* begin = s;
  2144. while (true)
  2145. {
  2146. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
  2147. if (*s == '<') // PCDATA ends here
  2148. {
  2149. char_t* end = g.flush(s);
  2150. if (opt_trim::value)
  2151. while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
  2152. --end;
  2153. *end = 0;
  2154. return s + 1;
  2155. }
  2156. else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
  2157. {
  2158. *s++ = '\n'; // replace first one with 0x0a
  2159. if (*s == '\n') g.push(s, 1);
  2160. }
  2161. else if (opt_escape::value && *s == '&')
  2162. {
  2163. s = strconv_escape(s, g);
  2164. }
  2165. else if (*s == 0)
  2166. {
  2167. char_t* end = g.flush(s);
  2168. if (opt_trim::value)
  2169. while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
  2170. --end;
  2171. *end = 0;
  2172. return s;
  2173. }
  2174. else ++s;
  2175. }
  2176. }
  2177. };
  2178. PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
  2179. {
  2180. PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
  2181. switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
  2182. {
  2183. case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
  2184. case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
  2185. case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
  2186. case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
  2187. case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
  2188. case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
  2189. case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
  2190. case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
  2191. default: assert(false); return 0; // unreachable
  2192. }
  2193. }
  2194. typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
  2195. template <typename opt_escape> struct strconv_attribute_impl
  2196. {
  2197. static char_t* parse_wnorm(char_t* s, char_t end_quote)
  2198. {
  2199. gap g;
  2200. // trim leading whitespaces
  2201. if (PUGI__IS_CHARTYPE(*s, ct_space))
  2202. {
  2203. char_t* str = s;
  2204. do ++str;
  2205. while (PUGI__IS_CHARTYPE(*str, ct_space));
  2206. g.push(s, str - s);
  2207. }
  2208. while (true)
  2209. {
  2210. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
  2211. if (*s == end_quote)
  2212. {
  2213. char_t* str = g.flush(s);
  2214. do *str-- = 0;
  2215. while (PUGI__IS_CHARTYPE(*str, ct_space));
  2216. return s + 1;
  2217. }
  2218. else if (PUGI__IS_CHARTYPE(*s, ct_space))
  2219. {
  2220. *s++ = ' ';
  2221. if (PUGI__IS_CHARTYPE(*s, ct_space))
  2222. {
  2223. char_t* str = s + 1;
  2224. while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
  2225. g.push(s, str - s);
  2226. }
  2227. }
  2228. else if (opt_escape::value && *s == '&')
  2229. {
  2230. s = strconv_escape(s, g);
  2231. }
  2232. else if (!*s)
  2233. {
  2234. return 0;
  2235. }
  2236. else ++s;
  2237. }
  2238. }
  2239. static char_t* parse_wconv(char_t* s, char_t end_quote)
  2240. {
  2241. gap g;
  2242. while (true)
  2243. {
  2244. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
  2245. if (*s == end_quote)
  2246. {
  2247. *g.flush(s) = 0;
  2248. return s + 1;
  2249. }
  2250. else if (PUGI__IS_CHARTYPE(*s, ct_space))
  2251. {
  2252. if (*s == '\r')
  2253. {
  2254. *s++ = ' ';
  2255. if (*s == '\n') g.push(s, 1);
  2256. }
  2257. else *s++ = ' ';
  2258. }
  2259. else if (opt_escape::value && *s == '&')
  2260. {
  2261. s = strconv_escape(s, g);
  2262. }
  2263. else if (!*s)
  2264. {
  2265. return 0;
  2266. }
  2267. else ++s;
  2268. }
  2269. }
  2270. static char_t* parse_eol(char_t* s, char_t end_quote)
  2271. {
  2272. gap g;
  2273. while (true)
  2274. {
  2275. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
  2276. if (*s == end_quote)
  2277. {
  2278. *g.flush(s) = 0;
  2279. return s + 1;
  2280. }
  2281. else if (*s == '\r')
  2282. {
  2283. *s++ = '\n';
  2284. if (*s == '\n') g.push(s, 1);
  2285. }
  2286. else if (opt_escape::value && *s == '&')
  2287. {
  2288. s = strconv_escape(s, g);
  2289. }
  2290. else if (!*s)
  2291. {
  2292. return 0;
  2293. }
  2294. else ++s;
  2295. }
  2296. }
  2297. static char_t* parse_simple(char_t* s, char_t end_quote)
  2298. {
  2299. gap g;
  2300. while (true)
  2301. {
  2302. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
  2303. if (*s == end_quote)
  2304. {
  2305. *g.flush(s) = 0;
  2306. return s + 1;
  2307. }
  2308. else if (opt_escape::value && *s == '&')
  2309. {
  2310. s = strconv_escape(s, g);
  2311. }
  2312. else if (!*s)
  2313. {
  2314. return 0;
  2315. }
  2316. else ++s;
  2317. }
  2318. }
  2319. };
  2320. PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
  2321. {
  2322. PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
  2323. switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
  2324. {
  2325. case 0: return strconv_attribute_impl<opt_false>::parse_simple;
  2326. case 1: return strconv_attribute_impl<opt_true>::parse_simple;
  2327. case 2: return strconv_attribute_impl<opt_false>::parse_eol;
  2328. case 3: return strconv_attribute_impl<opt_true>::parse_eol;
  2329. case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
  2330. case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
  2331. case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
  2332. case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
  2333. case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2334. case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2335. case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2336. case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2337. case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2338. case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2339. case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
  2340. case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
  2341. default: assert(false); return 0; // unreachable
  2342. }
  2343. }
  2344. inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
  2345. {
  2346. xml_parse_result result;
  2347. result.status = status;
  2348. result.offset = offset;
  2349. return result;
  2350. }
  2351. struct xml_parser
  2352. {
  2353. xml_allocator* alloc;
  2354. char_t* error_offset;
  2355. xml_parse_status error_status;
  2356. xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
  2357. {
  2358. }
  2359. // DOCTYPE consists of nested sections of the following possible types:
  2360. // <!-- ... -->, <? ... ?>, "...", '...'
  2361. // <![...]]>
  2362. // <!...>
  2363. // First group can not contain nested groups
  2364. // Second group can contain nested groups of the same type
  2365. // Third group can contain all other groups
  2366. char_t* parse_doctype_primitive(char_t* s)
  2367. {
  2368. if (*s == '"' || *s == '\'')
  2369. {
  2370. // quoted string
  2371. char_t ch = *s++;
  2372. PUGI__SCANFOR(*s == ch);
  2373. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2374. s++;
  2375. }
  2376. else if (s[0] == '<' && s[1] == '?')
  2377. {
  2378. // <? ... ?>
  2379. s += 2;
  2380. PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
  2381. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2382. s += 2;
  2383. }
  2384. else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
  2385. {
  2386. s += 4;
  2387. PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
  2388. if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
  2389. s += 3;
  2390. }
  2391. else PUGI__THROW_ERROR(status_bad_doctype, s);
  2392. return s;
  2393. }
  2394. char_t* parse_doctype_ignore(char_t* s)
  2395. {
  2396. size_t depth = 0;
  2397. assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
  2398. s += 3;
  2399. while (*s)
  2400. {
  2401. if (s[0] == '<' && s[1] == '!' && s[2] == '[')
  2402. {
  2403. // nested ignore section
  2404. s += 3;
  2405. depth++;
  2406. }
  2407. else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
  2408. {
  2409. // ignore section end
  2410. s += 3;
  2411. if (depth == 0)
  2412. return s;
  2413. depth--;
  2414. }
  2415. else s++;
  2416. }
  2417. PUGI__THROW_ERROR(status_bad_doctype, s);
  2418. }
  2419. char_t* parse_doctype_group(char_t* s, char_t endch)
  2420. {
  2421. size_t depth = 0;
  2422. assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
  2423. s += 2;
  2424. while (*s)
  2425. {
  2426. if (s[0] == '<' && s[1] == '!' && s[2] != '-')
  2427. {
  2428. if (s[2] == '[')
  2429. {
  2430. // ignore
  2431. s = parse_doctype_ignore(s);
  2432. if (!s) return s;
  2433. }
  2434. else
  2435. {
  2436. // some control group
  2437. s += 2;
  2438. depth++;
  2439. }
  2440. }
  2441. else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
  2442. {
  2443. // unknown tag (forbidden), or some primitive group
  2444. s = parse_doctype_primitive(s);
  2445. if (!s) return s;
  2446. }
  2447. else if (*s == '>')
  2448. {
  2449. if (depth == 0)
  2450. return s;
  2451. depth--;
  2452. s++;
  2453. }
  2454. else s++;
  2455. }
  2456. if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
  2457. return s;
  2458. }
  2459. char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
  2460. {
  2461. // parse node contents, starting with exclamation mark
  2462. ++s;
  2463. if (*s == '-') // '<!-...'
  2464. {
  2465. ++s;
  2466. if (*s == '-') // '<!--...'
  2467. {
  2468. ++s;
  2469. if (PUGI__OPTSET(parse_comments))
  2470. {
  2471. PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
  2472. cursor->value = s; // Save the offset.
  2473. }
  2474. if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
  2475. {
  2476. s = strconv_comment(s, endch);
  2477. if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
  2478. }
  2479. else
  2480. {
  2481. // Scan for terminating '-->'.
  2482. PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
  2483. PUGI__CHECK_ERROR(status_bad_comment, s);
  2484. if (PUGI__OPTSET(parse_comments))
  2485. *s = 0; // Zero-terminate this segment at the first terminating '-'.
  2486. s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
  2487. }
  2488. }
  2489. else PUGI__THROW_ERROR(status_bad_comment, s);
  2490. }
  2491. else if (*s == '[')
  2492. {
  2493. // '<![CDATA[...'
  2494. if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
  2495. {
  2496. ++s;
  2497. if (PUGI__OPTSET(parse_cdata))
  2498. {
  2499. PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
  2500. cursor->value = s; // Save the offset.
  2501. if (PUGI__OPTSET(parse_eol))
  2502. {
  2503. s = strconv_cdata(s, endch);
  2504. if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
  2505. }
  2506. else
  2507. {
  2508. // Scan for terminating ']]>'.
  2509. PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
  2510. PUGI__CHECK_ERROR(status_bad_cdata, s);
  2511. *s++ = 0; // Zero-terminate this segment.
  2512. }
  2513. }
  2514. else // Flagged for discard, but we still have to scan for the terminator.
  2515. {
  2516. // Scan for terminating ']]>'.
  2517. PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
  2518. PUGI__CHECK_ERROR(status_bad_cdata, s);
  2519. ++s;
  2520. }
  2521. s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
  2522. }
  2523. else PUGI__THROW_ERROR(status_bad_cdata, s);
  2524. }
  2525. else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
  2526. {
  2527. s -= 2;
  2528. if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
  2529. char_t* mark = s + 9;
  2530. s = parse_doctype_group(s, endch);
  2531. if (!s) return s;
  2532. assert((*s == 0 && endch == '>') || *s == '>');
  2533. if (*s) *s++ = 0;
  2534. if (PUGI__OPTSET(parse_doctype))
  2535. {
  2536. while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
  2537. PUGI__PUSHNODE(node_doctype);
  2538. cursor->value = mark;
  2539. }
  2540. }
  2541. else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
  2542. else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
  2543. else PUGI__THROW_ERROR(status_unrecognized_tag, s);
  2544. return s;
  2545. }
  2546. char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
  2547. {
  2548. // load into registers
  2549. xml_node_struct* cursor = ref_cursor;
  2550. char_t ch = 0;
  2551. // parse node contents, starting with question mark
  2552. ++s;
  2553. // read PI target
  2554. char_t* target = s;
  2555. if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
  2556. PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
  2557. PUGI__CHECK_ERROR(status_bad_pi, s);
  2558. // determine node type; stricmp / strcasecmp is not portable
  2559. bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
  2560. if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
  2561. {
  2562. if (declaration)
  2563. {
  2564. // disallow non top-level declarations
  2565. if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
  2566. PUGI__PUSHNODE(node_declaration);
  2567. }
  2568. else
  2569. {
  2570. PUGI__PUSHNODE(node_pi);
  2571. }
  2572. cursor->name = target;
  2573. PUGI__ENDSEG();
  2574. // parse value/attributes
  2575. if (ch == '?')
  2576. {
  2577. // empty node
  2578. if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
  2579. s += (*s == '>');
  2580. PUGI__POPNODE();
  2581. }
  2582. else if (PUGI__IS_CHARTYPE(ch, ct_space))
  2583. {
  2584. PUGI__SKIPWS();
  2585. // scan for tag end
  2586. char_t* value = s;
  2587. PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
  2588. PUGI__CHECK_ERROR(status_bad_pi, s);
  2589. if (declaration)
  2590. {
  2591. // replace ending ? with / so that 'element' terminates properly
  2592. *s = '/';
  2593. // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
  2594. s = value;
  2595. }
  2596. else
  2597. {
  2598. // store value and step over >
  2599. cursor->value = value;
  2600. PUGI__POPNODE();
  2601. PUGI__ENDSEG();
  2602. s += (*s == '>');
  2603. }
  2604. }
  2605. else PUGI__THROW_ERROR(status_bad_pi, s);
  2606. }
  2607. else
  2608. {
  2609. // scan for tag end
  2610. PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
  2611. PUGI__CHECK_ERROR(status_bad_pi, s);
  2612. s += (s[1] == '>' ? 2 : 1);
  2613. }
  2614. // store from registers
  2615. ref_cursor = cursor;
  2616. return s;
  2617. }
  2618. char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
  2619. {
  2620. strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
  2621. strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
  2622. char_t ch = 0;
  2623. xml_node_struct* cursor = root;
  2624. char_t* mark = s;
  2625. while (*s != 0)
  2626. {
  2627. if (*s == '<')
  2628. {
  2629. ++s;
  2630. LOC_TAG:
  2631. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
  2632. {
  2633. PUGI__PUSHNODE(node_element); // Append a new node to the tree.
  2634. cursor->name = s;
  2635. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2636. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
  2637. if (ch == '>')
  2638. {
  2639. // end of tag
  2640. }
  2641. else if (PUGI__IS_CHARTYPE(ch, ct_space))
  2642. {
  2643. LOC_ATTRIBUTES:
  2644. while (true)
  2645. {
  2646. PUGI__SKIPWS(); // Eat any whitespace.
  2647. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
  2648. {
  2649. xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
  2650. if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
  2651. a->name = s; // Save the offset.
  2652. PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
  2653. PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
  2654. if (PUGI__IS_CHARTYPE(ch, ct_space))
  2655. {
  2656. PUGI__SKIPWS(); // Eat any whitespace.
  2657. ch = *s;
  2658. ++s;
  2659. }
  2660. if (ch == '=') // '<... #=...'
  2661. {
  2662. PUGI__SKIPWS(); // Eat any whitespace.
  2663. if (*s == '"' || *s == '\'') // '<... #="...'
  2664. {
  2665. ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
  2666. ++s; // Step over the quote.
  2667. a->value = s; // Save the offset.
  2668. s = strconv_attribute(s, ch);
  2669. if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
  2670. // After this line the loop continues from the start;
  2671. // Whitespaces, / and > are ok, symbols and EOF are wrong,
  2672. // everything else will be detected
  2673. if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
  2674. }
  2675. else PUGI__THROW_ERROR(status_bad_attribute, s);
  2676. }
  2677. else PUGI__THROW_ERROR(status_bad_attribute, s);
  2678. }
  2679. else if (*s == '/')
  2680. {
  2681. ++s;
  2682. if (*s == '>')
  2683. {
  2684. PUGI__POPNODE();
  2685. s++;
  2686. break;
  2687. }
  2688. else if (*s == 0 && endch == '>')
  2689. {
  2690. PUGI__POPNODE();
  2691. break;
  2692. }
  2693. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2694. }
  2695. else if (*s == '>')
  2696. {
  2697. ++s;
  2698. break;
  2699. }
  2700. else if (*s == 0 && endch == '>')
  2701. {
  2702. break;
  2703. }
  2704. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2705. }
  2706. // !!!
  2707. }
  2708. else if (ch == '/') // '<#.../'
  2709. {
  2710. if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
  2711. PUGI__POPNODE(); // Pop.
  2712. s += (*s == '>');
  2713. }
  2714. else if (ch == 0)
  2715. {
  2716. // we stepped over null terminator, backtrack & handle closing tag
  2717. --s;
  2718. if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
  2719. }
  2720. else PUGI__THROW_ERROR(status_bad_start_element, s);
  2721. }
  2722. else if (*s == '/')
  2723. {
  2724. ++s;
  2725. mark = s;
  2726. char_t* name = cursor->name;
  2727. if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2728. while (PUGI__IS_CHARTYPE(*s, ct_symbol))
  2729. {
  2730. if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2731. }
  2732. if (*name)
  2733. {
  2734. if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
  2735. else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
  2736. }
  2737. PUGI__POPNODE(); // Pop.
  2738. PUGI__SKIPWS();
  2739. if (*s == 0)
  2740. {
  2741. if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
  2742. }
  2743. else
  2744. {
  2745. if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
  2746. ++s;
  2747. }
  2748. }
  2749. else if (*s == '?') // '<?...'
  2750. {
  2751. s = parse_question(s, cursor, optmsk, endch);
  2752. if (!s) return s;
  2753. assert(cursor);
  2754. if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
  2755. }
  2756. else if (*s == '!') // '<!...'
  2757. {
  2758. s = parse_exclamation(s, cursor, optmsk, endch);
  2759. if (!s) return s;
  2760. }
  2761. else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
  2762. else PUGI__THROW_ERROR(status_unrecognized_tag, s);
  2763. }
  2764. else
  2765. {
  2766. mark = s; // Save this offset while searching for a terminator.
  2767. PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
  2768. if (*s == '<' || !*s)
  2769. {
  2770. // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
  2771. assert(mark != s);
  2772. if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
  2773. {
  2774. continue;
  2775. }
  2776. else if (PUGI__OPTSET(parse_ws_pcdata_single))
  2777. {
  2778. if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
  2779. }
  2780. }
  2781. if (!PUGI__OPTSET(parse_trim_pcdata))
  2782. s = mark;
  2783. if (cursor->parent || PUGI__OPTSET(parse_fragment))
  2784. {
  2785. if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
  2786. {
  2787. cursor->value = s; // Save the offset.
  2788. }
  2789. else
  2790. {
  2791. PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
  2792. cursor->value = s; // Save the offset.
  2793. PUGI__POPNODE(); // Pop since this is a standalone.
  2794. }
  2795. s = strconv_pcdata(s);
  2796. if (!*s) break;
  2797. }
  2798. else
  2799. {
  2800. PUGI__SCANFOR(*s == '<'); // '...<'
  2801. if (!*s) break;
  2802. ++s;
  2803. }
  2804. // We're after '<'
  2805. goto LOC_TAG;
  2806. }
  2807. }
  2808. // check that last tag is closed
  2809. if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
  2810. return s;
  2811. }
  2812. #ifdef PUGIXML_WCHAR_MODE
  2813. static char_t* parse_skip_bom(char_t* s)
  2814. {
  2815. unsigned int bom = 0xfeff;
  2816. return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
  2817. }
  2818. #else
  2819. static char_t* parse_skip_bom(char_t* s)
  2820. {
  2821. return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
  2822. }
  2823. #endif
  2824. static bool has_element_node_siblings(xml_node_struct* node)
  2825. {
  2826. while (node)
  2827. {
  2828. if (PUGI__NODETYPE(node) == node_element) return true;
  2829. node = node->next_sibling;
  2830. }
  2831. return false;
  2832. }
  2833. static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
  2834. {
  2835. // early-out for empty documents
  2836. if (length == 0)
  2837. return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
  2838. // get last child of the root before parsing
  2839. xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
  2840. // create parser on stack
  2841. xml_parser parser(static_cast<xml_allocator*>(xmldoc));
  2842. // save last character and make buffer zero-terminated (speeds up parsing)
  2843. char_t endch = buffer[length - 1];
  2844. buffer[length - 1] = 0;
  2845. // skip BOM to make sure it does not end up as part of parse output
  2846. char_t* buffer_data = parse_skip_bom(buffer);
  2847. // perform actual parsing
  2848. parser.parse_tree(buffer_data, root, optmsk, endch);
  2849. xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
  2850. assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
  2851. if (result)
  2852. {
  2853. // since we removed last character, we have to handle the only possible false positive (stray <)
  2854. if (endch == '<')
  2855. return make_parse_result(status_unrecognized_tag, length - 1);
  2856. // check if there are any element nodes parsed
  2857. xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
  2858. if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
  2859. return make_parse_result(status_no_document_element, length - 1);
  2860. }
  2861. else
  2862. {
  2863. // roll back offset if it occurs on a null terminator in the source buffer
  2864. if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
  2865. result.offset--;
  2866. }
  2867. return result;
  2868. }
  2869. };
  2870. // Output facilities
  2871. PUGI__FN xml_encoding get_write_native_encoding()
  2872. {
  2873. #ifdef PUGIXML_WCHAR_MODE
  2874. return get_wchar_encoding();
  2875. #else
  2876. return encoding_utf8;
  2877. #endif
  2878. }
  2879. PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
  2880. {
  2881. // replace wchar encoding with utf implementation
  2882. if (encoding == encoding_wchar) return get_wchar_encoding();
  2883. // replace utf16 encoding with utf16 with specific endianness
  2884. if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2885. // replace utf32 encoding with utf32 with specific endianness
  2886. if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2887. // only do autodetection if no explicit encoding is requested
  2888. if (encoding != encoding_auto) return encoding;
  2889. // assume utf8 encoding
  2890. return encoding_utf8;
  2891. }
  2892. template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
  2893. {
  2894. PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2895. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2896. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2897. }
  2898. template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
  2899. {
  2900. PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
  2901. typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
  2902. if (opt_swap)
  2903. {
  2904. for (typename T::value_type i = dest; i != end; ++i)
  2905. *i = endian_swap(*i);
  2906. }
  2907. return static_cast<size_t>(end - dest) * sizeof(*dest);
  2908. }
  2909. #ifdef PUGIXML_WCHAR_MODE
  2910. PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
  2911. {
  2912. if (length < 1) return 0;
  2913. // discard last character if it's the lead of a surrogate pair
  2914. return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
  2915. }
  2916. PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2917. {
  2918. // only endian-swapping is required
  2919. if (need_endian_swap_utf(encoding, get_wchar_encoding()))
  2920. {
  2921. convert_wchar_endian_swap(r_char, data, length);
  2922. return length * sizeof(char_t);
  2923. }
  2924. // convert to utf8
  2925. if (encoding == encoding_utf8)
  2926. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
  2927. // convert to utf16
  2928. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2929. {
  2930. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2931. return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
  2932. }
  2933. // convert to utf32
  2934. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2935. {
  2936. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2937. return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
  2938. }
  2939. // convert to latin1
  2940. if (encoding == encoding_latin1)
  2941. return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
  2942. assert(false && "Invalid encoding"); // unreachable
  2943. return 0;
  2944. }
  2945. #else
  2946. PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
  2947. {
  2948. if (length < 5) return 0;
  2949. for (size_t i = 1; i <= 4; ++i)
  2950. {
  2951. uint8_t ch = static_cast<uint8_t>(data[length - i]);
  2952. // either a standalone character or a leading one
  2953. if ((ch & 0xc0) != 0x80) return length - i;
  2954. }
  2955. // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
  2956. return length;
  2957. }
  2958. PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
  2959. {
  2960. if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
  2961. {
  2962. xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
  2963. return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
  2964. }
  2965. if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
  2966. {
  2967. xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
  2968. return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
  2969. }
  2970. if (encoding == encoding_latin1)
  2971. return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
  2972. assert(false && "Invalid encoding"); // unreachable
  2973. return 0;
  2974. }
  2975. #endif
  2976. class xml_buffered_writer
  2977. {
  2978. xml_buffered_writer(const xml_buffered_writer&);
  2979. xml_buffered_writer& operator=(const xml_buffered_writer&);
  2980. public:
  2981. xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
  2982. {
  2983. PUGI__STATIC_ASSERT(bufcapacity >= 8);
  2984. }
  2985. size_t flush()
  2986. {
  2987. flush(buffer, bufsize);
  2988. bufsize = 0;
  2989. return 0;
  2990. }
  2991. void flush(const char_t* data, size_t size)
  2992. {
  2993. if (size == 0) return;
  2994. // fast path, just write data
  2995. if (encoding == get_write_native_encoding())
  2996. writer.write(data, size * sizeof(char_t));
  2997. else
  2998. {
  2999. // convert chunk
  3000. size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
  3001. assert(result <= sizeof(scratch));
  3002. // write data
  3003. writer.write(scratch.data_u8, result);
  3004. }
  3005. }
  3006. void write_direct(const char_t* data, size_t length)
  3007. {
  3008. // flush the remaining buffer contents
  3009. flush();
  3010. // handle large chunks
  3011. if (length > bufcapacity)
  3012. {
  3013. if (encoding == get_write_native_encoding())
  3014. {
  3015. // fast path, can just write data chunk
  3016. writer.write(data, length * sizeof(char_t));
  3017. return;
  3018. }
  3019. // need to convert in suitable chunks
  3020. while (length > bufcapacity)
  3021. {
  3022. // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
  3023. // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
  3024. size_t chunk_size = get_valid_length(data, bufcapacity);
  3025. assert(chunk_size);
  3026. // convert chunk and write
  3027. flush(data, chunk_size);
  3028. // iterate
  3029. data += chunk_size;
  3030. length -= chunk_size;
  3031. }
  3032. // small tail is copied below
  3033. bufsize = 0;
  3034. }
  3035. memcpy(buffer + bufsize, data, length * sizeof(char_t));
  3036. bufsize += length;
  3037. }
  3038. void write_buffer(const char_t* data, size_t length)
  3039. {
  3040. size_t offset = bufsize;
  3041. if (offset + length <= bufcapacity)
  3042. {
  3043. memcpy(buffer + offset, data, length * sizeof(char_t));
  3044. bufsize = offset + length;
  3045. }
  3046. else
  3047. {
  3048. write_direct(data, length);
  3049. }
  3050. }
  3051. void write_string(const char_t* data)
  3052. {
  3053. // write the part of the string that fits in the buffer
  3054. size_t offset = bufsize;
  3055. while (*data && offset < bufcapacity)
  3056. buffer[offset++] = *data++;
  3057. // write the rest
  3058. if (offset < bufcapacity)
  3059. {
  3060. bufsize = offset;
  3061. }
  3062. else
  3063. {
  3064. // backtrack a bit if we have split the codepoint
  3065. size_t length = offset - bufsize;
  3066. size_t extra = length - get_valid_length(data - length, length);
  3067. bufsize = offset - extra;
  3068. write_direct(data - extra, strlength(data) + extra);
  3069. }
  3070. }
  3071. void write(char_t d0)
  3072. {
  3073. size_t offset = bufsize;
  3074. if (offset > bufcapacity - 1) offset = flush();
  3075. buffer[offset + 0] = d0;
  3076. bufsize = offset + 1;
  3077. }
  3078. void write(char_t d0, char_t d1)
  3079. {
  3080. size_t offset = bufsize;
  3081. if (offset > bufcapacity - 2) offset = flush();
  3082. buffer[offset + 0] = d0;
  3083. buffer[offset + 1] = d1;
  3084. bufsize = offset + 2;
  3085. }
  3086. void write(char_t d0, char_t d1, char_t d2)
  3087. {
  3088. size_t offset = bufsize;
  3089. if (offset > bufcapacity - 3) offset = flush();
  3090. buffer[offset + 0] = d0;
  3091. buffer[offset + 1] = d1;
  3092. buffer[offset + 2] = d2;
  3093. bufsize = offset + 3;
  3094. }
  3095. void write(char_t d0, char_t d1, char_t d2, char_t d3)
  3096. {
  3097. size_t offset = bufsize;
  3098. if (offset > bufcapacity - 4) offset = flush();
  3099. buffer[offset + 0] = d0;
  3100. buffer[offset + 1] = d1;
  3101. buffer[offset + 2] = d2;
  3102. buffer[offset + 3] = d3;
  3103. bufsize = offset + 4;
  3104. }
  3105. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
  3106. {
  3107. size_t offset = bufsize;
  3108. if (offset > bufcapacity - 5) offset = flush();
  3109. buffer[offset + 0] = d0;
  3110. buffer[offset + 1] = d1;
  3111. buffer[offset + 2] = d2;
  3112. buffer[offset + 3] = d3;
  3113. buffer[offset + 4] = d4;
  3114. bufsize = offset + 5;
  3115. }
  3116. void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
  3117. {
  3118. size_t offset = bufsize;
  3119. if (offset > bufcapacity - 6) offset = flush();
  3120. buffer[offset + 0] = d0;
  3121. buffer[offset + 1] = d1;
  3122. buffer[offset + 2] = d2;
  3123. buffer[offset + 3] = d3;
  3124. buffer[offset + 4] = d4;
  3125. buffer[offset + 5] = d5;
  3126. bufsize = offset + 6;
  3127. }
  3128. // utf8 maximum expansion: x4 (-> utf32)
  3129. // utf16 maximum expansion: x2 (-> utf32)
  3130. // utf32 maximum expansion: x1
  3131. enum
  3132. {
  3133. bufcapacitybytes =
  3134. #ifdef PUGIXML_MEMORY_OUTPUT_STACK
  3135. PUGIXML_MEMORY_OUTPUT_STACK
  3136. #else
  3137. 10240
  3138. #endif
  3139. ,
  3140. bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
  3141. };
  3142. char_t buffer[bufcapacity];
  3143. union
  3144. {
  3145. uint8_t data_u8[4 * bufcapacity];
  3146. uint16_t data_u16[2 * bufcapacity];
  3147. uint32_t data_u32[bufcapacity];
  3148. char_t data_char[bufcapacity];
  3149. } scratch;
  3150. xml_writer& writer;
  3151. size_t bufsize;
  3152. xml_encoding encoding;
  3153. };
  3154. PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3155. {
  3156. while (*s)
  3157. {
  3158. const char_t* prev = s;
  3159. // While *s is a usual symbol
  3160. PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
  3161. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3162. switch (*s)
  3163. {
  3164. case 0: break;
  3165. case '&':
  3166. writer.write('&', 'a', 'm', 'p', ';');
  3167. ++s;
  3168. break;
  3169. case '<':
  3170. writer.write('&', 'l', 't', ';');
  3171. ++s;
  3172. break;
  3173. case '>':
  3174. writer.write('&', 'g', 't', ';');
  3175. ++s;
  3176. break;
  3177. case '"':
  3178. if (flags & format_attribute_single_quote)
  3179. writer.write('"');
  3180. else
  3181. writer.write('&', 'q', 'u', 'o', 't', ';');
  3182. ++s;
  3183. break;
  3184. case '\'':
  3185. if (flags & format_attribute_single_quote)
  3186. writer.write('&', 'a', 'p', 'o', 's', ';');
  3187. else
  3188. writer.write('\'');
  3189. ++s;
  3190. break;
  3191. default: // s is not a usual symbol
  3192. {
  3193. unsigned int ch = static_cast<unsigned int>(*s++);
  3194. assert(ch < 32);
  3195. if (!(flags & format_skip_control_chars))
  3196. writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
  3197. }
  3198. }
  3199. }
  3200. }
  3201. PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
  3202. {
  3203. if (flags & format_no_escapes)
  3204. writer.write_string(s);
  3205. else
  3206. text_output_escaped(writer, s, type, flags);
  3207. }
  3208. PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
  3209. {
  3210. do
  3211. {
  3212. writer.write('<', '!', '[', 'C', 'D');
  3213. writer.write('A', 'T', 'A', '[');
  3214. const char_t* prev = s;
  3215. // look for ]]> sequence - we can't output it as is since it terminates CDATA
  3216. while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
  3217. // skip ]] if we stopped at ]]>, > will go to the next CDATA section
  3218. if (*s) s += 2;
  3219. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3220. writer.write(']', ']', '>');
  3221. }
  3222. while (*s);
  3223. }
  3224. PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
  3225. {
  3226. switch (indent_length)
  3227. {
  3228. case 1:
  3229. {
  3230. for (unsigned int i = 0; i < depth; ++i)
  3231. writer.write(indent[0]);
  3232. break;
  3233. }
  3234. case 2:
  3235. {
  3236. for (unsigned int i = 0; i < depth; ++i)
  3237. writer.write(indent[0], indent[1]);
  3238. break;
  3239. }
  3240. case 3:
  3241. {
  3242. for (unsigned int i = 0; i < depth; ++i)
  3243. writer.write(indent[0], indent[1], indent[2]);
  3244. break;
  3245. }
  3246. case 4:
  3247. {
  3248. for (unsigned int i = 0; i < depth; ++i)
  3249. writer.write(indent[0], indent[1], indent[2], indent[3]);
  3250. break;
  3251. }
  3252. default:
  3253. {
  3254. for (unsigned int i = 0; i < depth; ++i)
  3255. writer.write_buffer(indent, indent_length);
  3256. }
  3257. }
  3258. }
  3259. PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
  3260. {
  3261. writer.write('<', '!', '-', '-');
  3262. while (*s)
  3263. {
  3264. const char_t* prev = s;
  3265. // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
  3266. while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
  3267. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3268. if (*s)
  3269. {
  3270. assert(*s == '-');
  3271. writer.write('-', ' ');
  3272. ++s;
  3273. }
  3274. }
  3275. writer.write('-', '-', '>');
  3276. }
  3277. PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
  3278. {
  3279. while (*s)
  3280. {
  3281. const char_t* prev = s;
  3282. // look for ?> sequence - we can't output it since ?> terminates PI
  3283. while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
  3284. writer.write_buffer(prev, static_cast<size_t>(s - prev));
  3285. if (*s)
  3286. {
  3287. assert(s[0] == '?' && s[1] == '>');
  3288. writer.write('?', ' ', '>');
  3289. s += 2;
  3290. }
  3291. }
  3292. }
  3293. PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3294. {
  3295. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3296. const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
  3297. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3298. {
  3299. if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
  3300. {
  3301. writer.write('\n');
  3302. text_output_indent(writer, indent, indent_length, depth + 1);
  3303. }
  3304. else
  3305. {
  3306. writer.write(' ');
  3307. }
  3308. writer.write_string(a->name ? a->name + 0 : default_name);
  3309. writer.write('=', enquotation_char);
  3310. if (a->value)
  3311. text_output(writer, a->value, ctx_special_attr, flags);
  3312. writer.write(enquotation_char);
  3313. }
  3314. }
  3315. PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
  3316. {
  3317. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3318. const char_t* name = node->name ? node->name + 0 : default_name;
  3319. writer.write('<');
  3320. writer.write_string(name);
  3321. if (node->first_attribute)
  3322. node_output_attributes(writer, node, indent, indent_length, flags, depth);
  3323. // element nodes can have value if parse_embed_pcdata was used
  3324. if (!node->value)
  3325. {
  3326. if (!node->first_child)
  3327. {
  3328. if (flags & format_no_empty_element_tags)
  3329. {
  3330. writer.write('>', '<', '/');
  3331. writer.write_string(name);
  3332. writer.write('>');
  3333. return false;
  3334. }
  3335. else
  3336. {
  3337. if ((flags & format_raw) == 0)
  3338. writer.write(' ');
  3339. writer.write('/', '>');
  3340. return false;
  3341. }
  3342. }
  3343. else
  3344. {
  3345. writer.write('>');
  3346. return true;
  3347. }
  3348. }
  3349. else
  3350. {
  3351. writer.write('>');
  3352. text_output(writer, node->value, ctx_special_pcdata, flags);
  3353. if (!node->first_child)
  3354. {
  3355. writer.write('<', '/');
  3356. writer.write_string(name);
  3357. writer.write('>');
  3358. return false;
  3359. }
  3360. else
  3361. {
  3362. return true;
  3363. }
  3364. }
  3365. }
  3366. PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
  3367. {
  3368. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3369. const char_t* name = node->name ? node->name + 0 : default_name;
  3370. writer.write('<', '/');
  3371. writer.write_string(name);
  3372. writer.write('>');
  3373. }
  3374. PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
  3375. {
  3376. const char_t* default_name = PUGIXML_TEXT(":anonymous");
  3377. switch (PUGI__NODETYPE(node))
  3378. {
  3379. case node_pcdata:
  3380. text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
  3381. break;
  3382. case node_cdata:
  3383. text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3384. break;
  3385. case node_comment:
  3386. node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
  3387. break;
  3388. case node_pi:
  3389. writer.write('<', '?');
  3390. writer.write_string(node->name ? node->name + 0 : default_name);
  3391. if (node->value)
  3392. {
  3393. writer.write(' ');
  3394. node_output_pi_value(writer, node->value);
  3395. }
  3396. writer.write('?', '>');
  3397. break;
  3398. case node_declaration:
  3399. writer.write('<', '?');
  3400. writer.write_string(node->name ? node->name + 0 : default_name);
  3401. node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
  3402. writer.write('?', '>');
  3403. break;
  3404. case node_doctype:
  3405. writer.write('<', '!', 'D', 'O', 'C');
  3406. writer.write('T', 'Y', 'P', 'E');
  3407. if (node->value)
  3408. {
  3409. writer.write(' ');
  3410. writer.write_string(node->value);
  3411. }
  3412. writer.write('>');
  3413. break;
  3414. default:
  3415. assert(false && "Invalid node type"); // unreachable
  3416. }
  3417. }
  3418. enum indent_flags_t
  3419. {
  3420. indent_newline = 1,
  3421. indent_indent = 2
  3422. };
  3423. PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
  3424. {
  3425. size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
  3426. unsigned int indent_flags = indent_indent;
  3427. xml_node_struct* node = root;
  3428. do
  3429. {
  3430. assert(node);
  3431. // begin writing current node
  3432. if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
  3433. {
  3434. node_output_simple(writer, node, flags);
  3435. indent_flags = 0;
  3436. }
  3437. else
  3438. {
  3439. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3440. writer.write('\n');
  3441. if ((indent_flags & indent_indent) && indent_length)
  3442. text_output_indent(writer, indent, indent_length, depth);
  3443. if (PUGI__NODETYPE(node) == node_element)
  3444. {
  3445. indent_flags = indent_newline | indent_indent;
  3446. if (node_output_start(writer, node, indent, indent_length, flags, depth))
  3447. {
  3448. // element nodes can have value if parse_embed_pcdata was used
  3449. if (node->value)
  3450. indent_flags = 0;
  3451. node = node->first_child;
  3452. depth++;
  3453. continue;
  3454. }
  3455. }
  3456. else if (PUGI__NODETYPE(node) == node_document)
  3457. {
  3458. indent_flags = indent_indent;
  3459. if (node->first_child)
  3460. {
  3461. node = node->first_child;
  3462. continue;
  3463. }
  3464. }
  3465. else
  3466. {
  3467. node_output_simple(writer, node, flags);
  3468. indent_flags = indent_newline | indent_indent;
  3469. }
  3470. }
  3471. // continue to the next node
  3472. while (node != root)
  3473. {
  3474. if (node->next_sibling)
  3475. {
  3476. node = node->next_sibling;
  3477. break;
  3478. }
  3479. node = node->parent;
  3480. // write closing node
  3481. if (PUGI__NODETYPE(node) == node_element)
  3482. {
  3483. depth--;
  3484. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3485. writer.write('\n');
  3486. if ((indent_flags & indent_indent) && indent_length)
  3487. text_output_indent(writer, indent, indent_length, depth);
  3488. node_output_end(writer, node);
  3489. indent_flags = indent_newline | indent_indent;
  3490. }
  3491. }
  3492. }
  3493. while (node != root);
  3494. if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
  3495. writer.write('\n');
  3496. }
  3497. PUGI__FN bool has_declaration(xml_node_struct* node)
  3498. {
  3499. for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
  3500. {
  3501. xml_node_type type = PUGI__NODETYPE(child);
  3502. if (type == node_declaration) return true;
  3503. if (type == node_element) return false;
  3504. }
  3505. return false;
  3506. }
  3507. PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
  3508. {
  3509. for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
  3510. if (a == attr)
  3511. return true;
  3512. return false;
  3513. }
  3514. PUGI__FN bool allow_insert_attribute(xml_node_type parent)
  3515. {
  3516. return parent == node_element || parent == node_declaration;
  3517. }
  3518. PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
  3519. {
  3520. if (parent != node_document && parent != node_element) return false;
  3521. if (child == node_document || child == node_null) return false;
  3522. if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
  3523. return true;
  3524. }
  3525. PUGI__FN bool allow_move(xml_node parent, xml_node child)
  3526. {
  3527. // check that child can be a child of parent
  3528. if (!allow_insert_child(parent.type(), child.type()))
  3529. return false;
  3530. // check that node is not moved between documents
  3531. if (parent.root() != child.root())
  3532. return false;
  3533. // check that new parent is not in the child subtree
  3534. xml_node cur = parent;
  3535. while (cur)
  3536. {
  3537. if (cur == child)
  3538. return false;
  3539. cur = cur.parent();
  3540. }
  3541. return true;
  3542. }
  3543. template <typename String, typename Header>
  3544. PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
  3545. {
  3546. assert(!dest && (header & header_mask) == 0);
  3547. if (source)
  3548. {
  3549. if (alloc && (source_header & header_mask) == 0)
  3550. {
  3551. dest = source;
  3552. // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
  3553. header |= xml_memory_page_contents_shared_mask;
  3554. source_header |= xml_memory_page_contents_shared_mask;
  3555. }
  3556. else
  3557. strcpy_insitu(dest, header, header_mask, source, strlength(source));
  3558. }
  3559. }
  3560. PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
  3561. {
  3562. node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
  3563. node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
  3564. for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
  3565. {
  3566. xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
  3567. if (da)
  3568. {
  3569. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3570. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3571. }
  3572. }
  3573. }
  3574. PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
  3575. {
  3576. xml_allocator& alloc = get_allocator(dn);
  3577. xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
  3578. node_copy_contents(dn, sn, shared_alloc);
  3579. xml_node_struct* dit = dn;
  3580. xml_node_struct* sit = sn->first_child;
  3581. while (sit && sit != sn)
  3582. {
  3583. // loop invariant: dit is inside the subtree rooted at dn
  3584. assert(dit);
  3585. // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
  3586. if (sit != dn)
  3587. {
  3588. xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
  3589. if (copy)
  3590. {
  3591. node_copy_contents(copy, sit, shared_alloc);
  3592. if (sit->first_child)
  3593. {
  3594. dit = copy;
  3595. sit = sit->first_child;
  3596. continue;
  3597. }
  3598. }
  3599. }
  3600. // continue to the next node
  3601. do
  3602. {
  3603. if (sit->next_sibling)
  3604. {
  3605. sit = sit->next_sibling;
  3606. break;
  3607. }
  3608. sit = sit->parent;
  3609. dit = dit->parent;
  3610. // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
  3611. assert(sit == sn || dit);
  3612. }
  3613. while (sit != sn);
  3614. }
  3615. assert(!sit || dit == dn->parent);
  3616. }
  3617. PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
  3618. {
  3619. xml_allocator& alloc = get_allocator(da);
  3620. xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
  3621. node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
  3622. node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
  3623. }
  3624. inline bool is_text_node(xml_node_struct* node)
  3625. {
  3626. xml_node_type type = PUGI__NODETYPE(node);
  3627. return type == node_pcdata || type == node_cdata;
  3628. }
  3629. // get value with conversion functions
  3630. template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
  3631. {
  3632. U result = 0;
  3633. const char_t* s = value;
  3634. while (PUGI__IS_CHARTYPE(*s, ct_space))
  3635. s++;
  3636. bool negative = (*s == '-');
  3637. s += (*s == '+' || *s == '-');
  3638. bool overflow = false;
  3639. if (s[0] == '0' && (s[1] | ' ') == 'x')
  3640. {
  3641. s += 2;
  3642. // since overflow detection relies on length of the sequence skip leading zeros
  3643. while (*s == '0')
  3644. s++;
  3645. const char_t* start = s;
  3646. for (;;)
  3647. {
  3648. if (static_cast<unsigned>(*s - '0') < 10)
  3649. result = result * 16 + (*s - '0');
  3650. else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
  3651. result = result * 16 + ((*s | ' ') - 'a' + 10);
  3652. else
  3653. break;
  3654. s++;
  3655. }
  3656. size_t digits = static_cast<size_t>(s - start);
  3657. overflow = digits > sizeof(U) * 2;
  3658. }
  3659. else
  3660. {
  3661. // since overflow detection relies on length of the sequence skip leading zeros
  3662. while (*s == '0')
  3663. s++;
  3664. const char_t* start = s;
  3665. for (;;)
  3666. {
  3667. if (static_cast<unsigned>(*s - '0') < 10)
  3668. result = result * 10 + (*s - '0');
  3669. else
  3670. break;
  3671. s++;
  3672. }
  3673. size_t digits = static_cast<size_t>(s - start);
  3674. PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
  3675. const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
  3676. const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
  3677. const size_t high_bit = sizeof(U) * 8 - 1;
  3678. overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
  3679. }
  3680. if (negative)
  3681. {
  3682. // Workaround for crayc++ CC-3059: Expected no overflow in routine.
  3683. #ifdef _CRAYC
  3684. return (overflow || result > ~minv + 1) ? minv : ~result + 1;
  3685. #else
  3686. return (overflow || result > 0 - minv) ? minv : 0 - result;
  3687. #endif
  3688. }
  3689. else
  3690. return (overflow || result > maxv) ? maxv : result;
  3691. }
  3692. PUGI__FN int get_value_int(const char_t* value)
  3693. {
  3694. return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
  3695. }
  3696. PUGI__FN unsigned int get_value_uint(const char_t* value)
  3697. {
  3698. return string_to_integer<unsigned int>(value, 0, UINT_MAX);
  3699. }
  3700. PUGI__FN double get_value_double(const char_t* value)
  3701. {
  3702. #ifdef PUGIXML_WCHAR_MODE
  3703. return wcstod(value, 0);
  3704. #else
  3705. return strtod(value, 0);
  3706. #endif
  3707. }
  3708. PUGI__FN float get_value_float(const char_t* value)
  3709. {
  3710. #ifdef PUGIXML_WCHAR_MODE
  3711. return static_cast<float>(wcstod(value, 0));
  3712. #else
  3713. return static_cast<float>(strtod(value, 0));
  3714. #endif
  3715. }
  3716. PUGI__FN bool get_value_bool(const char_t* value)
  3717. {
  3718. // only look at first char
  3719. char_t first = *value;
  3720. // 1*, t* (true), T* (True), y* (yes), Y* (YES)
  3721. return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
  3722. }
  3723. #ifdef PUGIXML_HAS_LONG_LONG
  3724. PUGI__FN long long get_value_llong(const char_t* value)
  3725. {
  3726. return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
  3727. }
  3728. PUGI__FN unsigned long long get_value_ullong(const char_t* value)
  3729. {
  3730. return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
  3731. }
  3732. #endif
  3733. template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
  3734. {
  3735. char_t* result = end - 1;
  3736. U rest = negative ? 0 - value : value;
  3737. do
  3738. {
  3739. *result-- = static_cast<char_t>('0' + (rest % 10));
  3740. rest /= 10;
  3741. }
  3742. while (rest);
  3743. assert(result >= begin);
  3744. (void)begin;
  3745. *result = '-';
  3746. return result + !negative;
  3747. }
  3748. // set value with conversion functions
  3749. template <typename String, typename Header>
  3750. PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
  3751. {
  3752. #ifdef PUGIXML_WCHAR_MODE
  3753. char_t wbuf[128];
  3754. assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
  3755. size_t offset = 0;
  3756. for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
  3757. return strcpy_insitu(dest, header, header_mask, wbuf, offset);
  3758. #else
  3759. return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
  3760. #endif
  3761. }
  3762. template <typename U, typename String, typename Header>
  3763. PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
  3764. {
  3765. char_t buf[64];
  3766. char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
  3767. char_t* begin = integer_to_string(buf, end, value, negative);
  3768. return strcpy_insitu(dest, header, header_mask, begin, end - begin);
  3769. }
  3770. template <typename String, typename Header>
  3771. PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
  3772. {
  3773. char buf[128];
  3774. PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
  3775. return set_value_ascii(dest, header, header_mask, buf);
  3776. }
  3777. template <typename String, typename Header>
  3778. PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
  3779. {
  3780. char buf[128];
  3781. PUGI__SNPRINTF(buf, "%.*g", precision, value);
  3782. return set_value_ascii(dest, header, header_mask, buf);
  3783. }
  3784. template <typename String, typename Header>
  3785. PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
  3786. {
  3787. return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
  3788. }
  3789. PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
  3790. {
  3791. // check input buffer
  3792. if (!contents && size) return make_parse_result(status_io_error);
  3793. // get actual encoding
  3794. xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
  3795. // if convert_buffer below throws bad_alloc, we still need to deallocate contents if we own it
  3796. auto_deleter<void> contents_guard(own ? contents : 0, xml_memory::deallocate);
  3797. // get private buffer
  3798. char_t* buffer = 0;
  3799. size_t length = 0;
  3800. // coverity[var_deref_model]
  3801. if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
  3802. // after this we either deallocate contents (below) or hold on to it via doc->buffer, so we don't need to guard it
  3803. contents_guard.release();
  3804. // delete original buffer if we performed a conversion
  3805. if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
  3806. // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
  3807. if (own || buffer != contents) *out_buffer = buffer;
  3808. // store buffer for offset_debug
  3809. doc->buffer = buffer;
  3810. // parse
  3811. xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
  3812. // remember encoding
  3813. res.encoding = buffer_encoding;
  3814. return res;
  3815. }
  3816. // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
  3817. PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
  3818. {
  3819. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  3820. // there are 64-bit versions of fseek/ftell, let's use them
  3821. typedef __int64 length_type;
  3822. _fseeki64(file, 0, SEEK_END);
  3823. length_type length = _ftelli64(file);
  3824. _fseeki64(file, 0, SEEK_SET);
  3825. #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
  3826. // there are 64-bit versions of fseek/ftell, let's use them
  3827. typedef off64_t length_type;
  3828. fseeko64(file, 0, SEEK_END);
  3829. length_type length = ftello64(file);
  3830. fseeko64(file, 0, SEEK_SET);
  3831. #else
  3832. // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
  3833. typedef long length_type;
  3834. fseek(file, 0, SEEK_END);
  3835. length_type length = ftell(file);
  3836. fseek(file, 0, SEEK_SET);
  3837. #endif
  3838. // check for I/O errors
  3839. if (length < 0) return status_io_error;
  3840. // check for overflow
  3841. size_t result = static_cast<size_t>(length);
  3842. if (static_cast<length_type>(result) != length) return status_out_of_memory;
  3843. // finalize
  3844. out_result = result;
  3845. return status_ok;
  3846. }
  3847. // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
  3848. PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
  3849. {
  3850. // We only need to zero-terminate if encoding conversion does not do it for us
  3851. #ifdef PUGIXML_WCHAR_MODE
  3852. xml_encoding wchar_encoding = get_wchar_encoding();
  3853. if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
  3854. {
  3855. size_t length = size / sizeof(char_t);
  3856. static_cast<char_t*>(buffer)[length] = 0;
  3857. return (length + 1) * sizeof(char_t);
  3858. }
  3859. #else
  3860. if (encoding == encoding_utf8)
  3861. {
  3862. static_cast<char*>(buffer)[size] = 0;
  3863. return size + 1;
  3864. }
  3865. #endif
  3866. return size;
  3867. }
  3868. PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  3869. {
  3870. if (!file) return make_parse_result(status_file_not_found);
  3871. // get file size (can result in I/O errors)
  3872. size_t size = 0;
  3873. xml_parse_status size_status = get_file_size(file, size);
  3874. if (size_status != status_ok) return make_parse_result(size_status);
  3875. size_t max_suffix_size = sizeof(char_t);
  3876. // allocate buffer for the whole file
  3877. char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
  3878. if (!contents) return make_parse_result(status_out_of_memory);
  3879. // read file in memory
  3880. size_t read_size = fread(contents, 1, size, file);
  3881. if (read_size != size)
  3882. {
  3883. xml_memory::deallocate(contents);
  3884. return make_parse_result(status_io_error);
  3885. }
  3886. xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
  3887. return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
  3888. }
  3889. PUGI__FN void close_file(FILE* file)
  3890. {
  3891. fclose(file);
  3892. }
  3893. #ifndef PUGIXML_NO_STL
  3894. template <typename T> struct xml_stream_chunk
  3895. {
  3896. static xml_stream_chunk* create()
  3897. {
  3898. void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
  3899. if (!memory) return 0;
  3900. return new (memory) xml_stream_chunk();
  3901. }
  3902. static void destroy(xml_stream_chunk* chunk)
  3903. {
  3904. // free chunk chain
  3905. while (chunk)
  3906. {
  3907. xml_stream_chunk* next_ = chunk->next;
  3908. xml_memory::deallocate(chunk);
  3909. chunk = next_;
  3910. }
  3911. }
  3912. xml_stream_chunk(): next(0), size(0)
  3913. {
  3914. }
  3915. xml_stream_chunk* next;
  3916. size_t size;
  3917. T data[xml_memory_page_size / sizeof(T)];
  3918. };
  3919. template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3920. {
  3921. auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
  3922. // read file to a chunk list
  3923. size_t total = 0;
  3924. xml_stream_chunk<T>* last = 0;
  3925. while (!stream.eof())
  3926. {
  3927. // allocate new chunk
  3928. xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
  3929. if (!chunk) return status_out_of_memory;
  3930. // append chunk to list
  3931. if (last) last = last->next = chunk;
  3932. else chunks.data = last = chunk;
  3933. // read data to chunk
  3934. stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
  3935. chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
  3936. // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
  3937. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  3938. // guard against huge files (chunk size is small enough to make this overflow check work)
  3939. if (total + chunk->size < total) return status_out_of_memory;
  3940. total += chunk->size;
  3941. }
  3942. size_t max_suffix_size = sizeof(char_t);
  3943. // copy chunk list to a contiguous buffer
  3944. char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
  3945. if (!buffer) return status_out_of_memory;
  3946. char* write = buffer;
  3947. for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
  3948. {
  3949. assert(write + chunk->size <= buffer + total);
  3950. memcpy(write, chunk->data, chunk->size);
  3951. write += chunk->size;
  3952. }
  3953. assert(write == buffer + total);
  3954. // return buffer
  3955. *out_buffer = buffer;
  3956. *out_size = total;
  3957. return status_ok;
  3958. }
  3959. template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
  3960. {
  3961. // get length of remaining data in stream
  3962. typename std::basic_istream<T>::pos_type pos = stream.tellg();
  3963. stream.seekg(0, std::ios::end);
  3964. std::streamoff length = stream.tellg() - pos;
  3965. stream.seekg(pos);
  3966. if (stream.fail() || pos < 0) return status_io_error;
  3967. // guard against huge files
  3968. size_t read_length = static_cast<size_t>(length);
  3969. if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
  3970. size_t max_suffix_size = sizeof(char_t);
  3971. // read stream data into memory (guard against stream exceptions with buffer holder)
  3972. auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
  3973. if (!buffer.data) return status_out_of_memory;
  3974. stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
  3975. // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
  3976. if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
  3977. // return buffer
  3978. size_t actual_length = static_cast<size_t>(stream.gcount());
  3979. assert(actual_length <= read_length);
  3980. *out_buffer = buffer.release();
  3981. *out_size = actual_length * sizeof(T);
  3982. return status_ok;
  3983. }
  3984. template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
  3985. {
  3986. void* buffer = 0;
  3987. size_t size = 0;
  3988. xml_parse_status status = status_ok;
  3989. // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
  3990. if (stream.fail()) return make_parse_result(status_io_error);
  3991. // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
  3992. if (stream.tellg() < 0)
  3993. {
  3994. stream.clear(); // clear error flags that could be set by a failing tellg
  3995. status = load_stream_data_noseek(stream, &buffer, &size);
  3996. }
  3997. else
  3998. status = load_stream_data_seek(stream, &buffer, &size);
  3999. if (status != status_ok) return make_parse_result(status);
  4000. xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
  4001. return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
  4002. }
  4003. #endif
  4004. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
  4005. PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  4006. {
  4007. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  4008. FILE* file = 0;
  4009. return _wfopen_s(&file, path, mode) == 0 ? file : 0;
  4010. #else
  4011. return _wfopen(path, mode);
  4012. #endif
  4013. }
  4014. #else
  4015. PUGI__FN char* convert_path_heap(const wchar_t* str)
  4016. {
  4017. assert(str);
  4018. // first pass: get length in utf8 characters
  4019. size_t length = strlength_wide(str);
  4020. size_t size = as_utf8_begin(str, length);
  4021. // allocate resulting string
  4022. char* result = static_cast<char*>(xml_memory::allocate(size + 1));
  4023. if (!result) return 0;
  4024. // second pass: convert to utf8
  4025. as_utf8_end(result, size, str, length);
  4026. // zero-terminate
  4027. result[size] = 0;
  4028. return result;
  4029. }
  4030. PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
  4031. {
  4032. // there is no standard function to open wide paths, so our best bet is to try utf8 path
  4033. char* path_utf8 = convert_path_heap(path);
  4034. if (!path_utf8) return 0;
  4035. // convert mode to ASCII (we mirror _wfopen interface)
  4036. char mode_ascii[4] = {0};
  4037. for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
  4038. // try to open the utf8 path
  4039. FILE* result = fopen(path_utf8, mode_ascii);
  4040. // free dummy buffer
  4041. xml_memory::deallocate(path_utf8);
  4042. return result;
  4043. }
  4044. #endif
  4045. PUGI__FN FILE* open_file(const char* path, const char* mode)
  4046. {
  4047. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  4048. FILE* file = 0;
  4049. return fopen_s(&file, path, mode) == 0 ? file : 0;
  4050. #else
  4051. return fopen(path, mode);
  4052. #endif
  4053. }
  4054. PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
  4055. {
  4056. if (!file) return false;
  4057. xml_writer_file writer(file);
  4058. doc.save(writer, indent, flags, encoding);
  4059. return fflush(file) == 0 && ferror(file) == 0;
  4060. }
  4061. struct name_null_sentry
  4062. {
  4063. xml_node_struct* node;
  4064. char_t* name;
  4065. name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
  4066. {
  4067. node->name = 0;
  4068. }
  4069. ~name_null_sentry()
  4070. {
  4071. node->name = name;
  4072. }
  4073. };
  4074. PUGI__NS_END
  4075. namespace pugi
  4076. {
  4077. PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
  4078. {
  4079. }
  4080. PUGI__FN void xml_writer_file::write(const void* data, size_t size)
  4081. {
  4082. size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
  4083. (void)!result; // unfortunately we can't do proper error handling here
  4084. }
  4085. #ifndef PUGIXML_NO_STL
  4086. PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
  4087. {
  4088. }
  4089. PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
  4090. {
  4091. }
  4092. PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
  4093. {
  4094. if (narrow_stream)
  4095. {
  4096. assert(!wide_stream);
  4097. narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
  4098. }
  4099. else
  4100. {
  4101. assert(wide_stream);
  4102. assert(size % sizeof(wchar_t) == 0);
  4103. wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
  4104. }
  4105. }
  4106. #endif
  4107. PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
  4108. {
  4109. }
  4110. PUGI__FN xml_tree_walker::~xml_tree_walker()
  4111. {
  4112. }
  4113. PUGI__FN int xml_tree_walker::depth() const
  4114. {
  4115. return _depth;
  4116. }
  4117. PUGI__FN bool xml_tree_walker::begin(xml_node&)
  4118. {
  4119. return true;
  4120. }
  4121. PUGI__FN bool xml_tree_walker::end(xml_node&)
  4122. {
  4123. return true;
  4124. }
  4125. PUGI__FN xml_attribute::xml_attribute(): _attr(0)
  4126. {
  4127. }
  4128. PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
  4129. {
  4130. }
  4131. PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
  4132. {
  4133. }
  4134. PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
  4135. {
  4136. return _attr ? unspecified_bool_xml_attribute : 0;
  4137. }
  4138. PUGI__FN bool xml_attribute::operator!() const
  4139. {
  4140. return !_attr;
  4141. }
  4142. PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
  4143. {
  4144. return (_attr == r._attr);
  4145. }
  4146. PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
  4147. {
  4148. return (_attr != r._attr);
  4149. }
  4150. PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
  4151. {
  4152. return (_attr < r._attr);
  4153. }
  4154. PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
  4155. {
  4156. return (_attr > r._attr);
  4157. }
  4158. PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
  4159. {
  4160. return (_attr <= r._attr);
  4161. }
  4162. PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
  4163. {
  4164. return (_attr >= r._attr);
  4165. }
  4166. PUGI__FN xml_attribute xml_attribute::next_attribute() const
  4167. {
  4168. if (!_attr) return xml_attribute();
  4169. return xml_attribute(_attr->next_attribute);
  4170. }
  4171. PUGI__FN xml_attribute xml_attribute::previous_attribute() const
  4172. {
  4173. if (!_attr) return xml_attribute();
  4174. xml_attribute_struct* prev = _attr->prev_attribute_c;
  4175. return prev->next_attribute ? xml_attribute(prev) : xml_attribute();
  4176. }
  4177. PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
  4178. {
  4179. if (!_attr) return def;
  4180. const char_t* value = _attr->value;
  4181. return value ? value : def;
  4182. }
  4183. PUGI__FN int xml_attribute::as_int(int def) const
  4184. {
  4185. if (!_attr) return def;
  4186. const char_t* value = _attr->value;
  4187. return value ? impl::get_value_int(value) : def;
  4188. }
  4189. PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
  4190. {
  4191. if (!_attr) return def;
  4192. const char_t* value = _attr->value;
  4193. return value ? impl::get_value_uint(value) : def;
  4194. }
  4195. PUGI__FN double xml_attribute::as_double(double def) const
  4196. {
  4197. if (!_attr) return def;
  4198. const char_t* value = _attr->value;
  4199. return value ? impl::get_value_double(value) : def;
  4200. }
  4201. PUGI__FN float xml_attribute::as_float(float def) const
  4202. {
  4203. if (!_attr) return def;
  4204. const char_t* value = _attr->value;
  4205. return value ? impl::get_value_float(value) : def;
  4206. }
  4207. PUGI__FN bool xml_attribute::as_bool(bool def) const
  4208. {
  4209. if (!_attr) return def;
  4210. const char_t* value = _attr->value;
  4211. return value ? impl::get_value_bool(value) : def;
  4212. }
  4213. #ifdef PUGIXML_HAS_LONG_LONG
  4214. PUGI__FN long long xml_attribute::as_llong(long long def) const
  4215. {
  4216. if (!_attr) return def;
  4217. const char_t* value = _attr->value;
  4218. return value ? impl::get_value_llong(value) : def;
  4219. }
  4220. PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
  4221. {
  4222. if (!_attr) return def;
  4223. const char_t* value = _attr->value;
  4224. return value ? impl::get_value_ullong(value) : def;
  4225. }
  4226. #endif
  4227. PUGI__FN bool xml_attribute::empty() const
  4228. {
  4229. return !_attr;
  4230. }
  4231. PUGI__FN const char_t* xml_attribute::name() const
  4232. {
  4233. if (!_attr) return PUGIXML_TEXT("");
  4234. const char_t* name = _attr->name;
  4235. return name ? name : PUGIXML_TEXT("");
  4236. }
  4237. PUGI__FN const char_t* xml_attribute::value() const
  4238. {
  4239. if (!_attr) return PUGIXML_TEXT("");
  4240. const char_t* value = _attr->value;
  4241. return value ? value : PUGIXML_TEXT("");
  4242. }
  4243. PUGI__FN size_t xml_attribute::hash_value() const
  4244. {
  4245. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
  4246. }
  4247. PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
  4248. {
  4249. return _attr;
  4250. }
  4251. PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
  4252. {
  4253. set_value(rhs);
  4254. return *this;
  4255. }
  4256. PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
  4257. {
  4258. set_value(rhs);
  4259. return *this;
  4260. }
  4261. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
  4262. {
  4263. set_value(rhs);
  4264. return *this;
  4265. }
  4266. PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
  4267. {
  4268. set_value(rhs);
  4269. return *this;
  4270. }
  4271. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
  4272. {
  4273. set_value(rhs);
  4274. return *this;
  4275. }
  4276. PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
  4277. {
  4278. set_value(rhs);
  4279. return *this;
  4280. }
  4281. PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
  4282. {
  4283. set_value(rhs);
  4284. return *this;
  4285. }
  4286. PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
  4287. {
  4288. set_value(rhs);
  4289. return *this;
  4290. }
  4291. #ifdef PUGIXML_HAS_LONG_LONG
  4292. PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
  4293. {
  4294. set_value(rhs);
  4295. return *this;
  4296. }
  4297. PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
  4298. {
  4299. set_value(rhs);
  4300. return *this;
  4301. }
  4302. #endif
  4303. PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
  4304. {
  4305. if (!_attr) return false;
  4306. return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4307. }
  4308. PUGI__FN bool xml_attribute::set_value(const char_t* rhs, size_t sz)
  4309. {
  4310. if (!_attr) return false;
  4311. return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, sz);
  4312. }
  4313. PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
  4314. {
  4315. if (!_attr) return false;
  4316. return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4317. }
  4318. PUGI__FN bool xml_attribute::set_value(int rhs)
  4319. {
  4320. if (!_attr) return false;
  4321. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4322. }
  4323. PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
  4324. {
  4325. if (!_attr) return false;
  4326. return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4327. }
  4328. PUGI__FN bool xml_attribute::set_value(long rhs)
  4329. {
  4330. if (!_attr) return false;
  4331. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4332. }
  4333. PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
  4334. {
  4335. if (!_attr) return false;
  4336. return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4337. }
  4338. PUGI__FN bool xml_attribute::set_value(double rhs)
  4339. {
  4340. if (!_attr) return false;
  4341. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
  4342. }
  4343. PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
  4344. {
  4345. if (!_attr) return false;
  4346. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4347. }
  4348. PUGI__FN bool xml_attribute::set_value(float rhs)
  4349. {
  4350. if (!_attr) return false;
  4351. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
  4352. }
  4353. PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
  4354. {
  4355. if (!_attr) return false;
  4356. return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
  4357. }
  4358. PUGI__FN bool xml_attribute::set_value(bool rhs)
  4359. {
  4360. if (!_attr) return false;
  4361. return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
  4362. }
  4363. #ifdef PUGIXML_HAS_LONG_LONG
  4364. PUGI__FN bool xml_attribute::set_value(long long rhs)
  4365. {
  4366. if (!_attr) return false;
  4367. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
  4368. }
  4369. PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
  4370. {
  4371. if (!_attr) return false;
  4372. return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
  4373. }
  4374. #endif
  4375. #ifdef __BORLANDC__
  4376. PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
  4377. {
  4378. return (bool)lhs && rhs;
  4379. }
  4380. PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
  4381. {
  4382. return (bool)lhs || rhs;
  4383. }
  4384. #endif
  4385. PUGI__FN xml_node::xml_node(): _root(0)
  4386. {
  4387. }
  4388. PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
  4389. {
  4390. }
  4391. PUGI__FN static void unspecified_bool_xml_node(xml_node***)
  4392. {
  4393. }
  4394. PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
  4395. {
  4396. return _root ? unspecified_bool_xml_node : 0;
  4397. }
  4398. PUGI__FN bool xml_node::operator!() const
  4399. {
  4400. return !_root;
  4401. }
  4402. PUGI__FN xml_node::iterator xml_node::begin() const
  4403. {
  4404. return iterator(_root ? _root->first_child + 0 : 0, _root);
  4405. }
  4406. PUGI__FN xml_node::iterator xml_node::end() const
  4407. {
  4408. return iterator(0, _root);
  4409. }
  4410. PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
  4411. {
  4412. return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
  4413. }
  4414. PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
  4415. {
  4416. return attribute_iterator(0, _root);
  4417. }
  4418. PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
  4419. {
  4420. return xml_object_range<xml_node_iterator>(begin(), end());
  4421. }
  4422. PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
  4423. {
  4424. return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
  4425. }
  4426. PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
  4427. {
  4428. return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
  4429. }
  4430. PUGI__FN bool xml_node::operator==(const xml_node& r) const
  4431. {
  4432. return (_root == r._root);
  4433. }
  4434. PUGI__FN bool xml_node::operator!=(const xml_node& r) const
  4435. {
  4436. return (_root != r._root);
  4437. }
  4438. PUGI__FN bool xml_node::operator<(const xml_node& r) const
  4439. {
  4440. return (_root < r._root);
  4441. }
  4442. PUGI__FN bool xml_node::operator>(const xml_node& r) const
  4443. {
  4444. return (_root > r._root);
  4445. }
  4446. PUGI__FN bool xml_node::operator<=(const xml_node& r) const
  4447. {
  4448. return (_root <= r._root);
  4449. }
  4450. PUGI__FN bool xml_node::operator>=(const xml_node& r) const
  4451. {
  4452. return (_root >= r._root);
  4453. }
  4454. PUGI__FN bool xml_node::empty() const
  4455. {
  4456. return !_root;
  4457. }
  4458. PUGI__FN const char_t* xml_node::name() const
  4459. {
  4460. if (!_root) return PUGIXML_TEXT("");
  4461. const char_t* name = _root->name;
  4462. return name ? name : PUGIXML_TEXT("");
  4463. }
  4464. PUGI__FN xml_node_type xml_node::type() const
  4465. {
  4466. return _root ? PUGI__NODETYPE(_root) : node_null;
  4467. }
  4468. PUGI__FN const char_t* xml_node::value() const
  4469. {
  4470. if (!_root) return PUGIXML_TEXT("");
  4471. const char_t* value = _root->value;
  4472. return value ? value : PUGIXML_TEXT("");
  4473. }
  4474. PUGI__FN xml_node xml_node::child(const char_t* name_) const
  4475. {
  4476. if (!_root) return xml_node();
  4477. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4478. {
  4479. const char_t* iname = i->name;
  4480. if (iname && impl::strequal(name_, iname))
  4481. return xml_node(i);
  4482. }
  4483. return xml_node();
  4484. }
  4485. PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
  4486. {
  4487. if (!_root) return xml_attribute();
  4488. for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
  4489. {
  4490. const char_t* iname = i->name;
  4491. if (iname && impl::strequal(name_, iname))
  4492. return xml_attribute(i);
  4493. }
  4494. return xml_attribute();
  4495. }
  4496. PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
  4497. {
  4498. if (!_root) return xml_node();
  4499. for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
  4500. {
  4501. const char_t* iname = i->name;
  4502. if (iname && impl::strequal(name_, iname))
  4503. return xml_node(i);
  4504. }
  4505. return xml_node();
  4506. }
  4507. PUGI__FN xml_node xml_node::next_sibling() const
  4508. {
  4509. return _root ? xml_node(_root->next_sibling) : xml_node();
  4510. }
  4511. PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
  4512. {
  4513. if (!_root) return xml_node();
  4514. for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
  4515. {
  4516. const char_t* iname = i->name;
  4517. if (iname && impl::strequal(name_, iname))
  4518. return xml_node(i);
  4519. }
  4520. return xml_node();
  4521. }
  4522. PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
  4523. {
  4524. xml_attribute_struct* hint = hint_._attr;
  4525. // if hint is not an attribute of node, behavior is not defined
  4526. assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
  4527. if (!_root) return xml_attribute();
  4528. // optimistically search from hint up until the end
  4529. for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
  4530. {
  4531. const char_t* iname = i->name;
  4532. if (iname && impl::strequal(name_, iname))
  4533. {
  4534. // update hint to maximize efficiency of searching for consecutive attributes
  4535. hint_._attr = i->next_attribute;
  4536. return xml_attribute(i);
  4537. }
  4538. }
  4539. // wrap around and search from the first attribute until the hint
  4540. // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
  4541. for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
  4542. {
  4543. const char_t* jname = j->name;
  4544. if (jname && impl::strequal(name_, jname))
  4545. {
  4546. // update hint to maximize efficiency of searching for consecutive attributes
  4547. hint_._attr = j->next_attribute;
  4548. return xml_attribute(j);
  4549. }
  4550. }
  4551. return xml_attribute();
  4552. }
  4553. PUGI__FN xml_node xml_node::previous_sibling() const
  4554. {
  4555. if (!_root) return xml_node();
  4556. xml_node_struct* prev = _root->prev_sibling_c;
  4557. return prev->next_sibling ? xml_node(prev) : xml_node();
  4558. }
  4559. PUGI__FN xml_node xml_node::parent() const
  4560. {
  4561. return _root ? xml_node(_root->parent) : xml_node();
  4562. }
  4563. PUGI__FN xml_node xml_node::root() const
  4564. {
  4565. return _root ? xml_node(&impl::get_document(_root)) : xml_node();
  4566. }
  4567. PUGI__FN xml_text xml_node::text() const
  4568. {
  4569. return xml_text(_root);
  4570. }
  4571. PUGI__FN const char_t* xml_node::child_value() const
  4572. {
  4573. if (!_root) return PUGIXML_TEXT("");
  4574. // element nodes can have value if parse_embed_pcdata was used
  4575. if (PUGI__NODETYPE(_root) == node_element && _root->value)
  4576. return _root->value;
  4577. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4578. {
  4579. const char_t* ivalue = i->value;
  4580. if (impl::is_text_node(i) && ivalue)
  4581. return ivalue;
  4582. }
  4583. return PUGIXML_TEXT("");
  4584. }
  4585. PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
  4586. {
  4587. return child(name_).child_value();
  4588. }
  4589. PUGI__FN xml_attribute xml_node::first_attribute() const
  4590. {
  4591. if (!_root) return xml_attribute();
  4592. return xml_attribute(_root->first_attribute);
  4593. }
  4594. PUGI__FN xml_attribute xml_node::last_attribute() const
  4595. {
  4596. if (!_root) return xml_attribute();
  4597. xml_attribute_struct* first = _root->first_attribute;
  4598. return first ? xml_attribute(first->prev_attribute_c) : xml_attribute();
  4599. }
  4600. PUGI__FN xml_node xml_node::first_child() const
  4601. {
  4602. if (!_root) return xml_node();
  4603. return xml_node(_root->first_child);
  4604. }
  4605. PUGI__FN xml_node xml_node::last_child() const
  4606. {
  4607. if (!_root) return xml_node();
  4608. xml_node_struct* first = _root->first_child;
  4609. return first ? xml_node(first->prev_sibling_c) : xml_node();
  4610. }
  4611. PUGI__FN bool xml_node::set_name(const char_t* rhs)
  4612. {
  4613. xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
  4614. if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
  4615. return false;
  4616. return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
  4617. }
  4618. PUGI__FN bool xml_node::set_value(const char_t* rhs, size_t sz)
  4619. {
  4620. xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
  4621. if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
  4622. return false;
  4623. return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, sz);
  4624. }
  4625. PUGI__FN bool xml_node::set_value(const char_t* rhs)
  4626. {
  4627. xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
  4628. if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
  4629. return false;
  4630. return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
  4631. }
  4632. PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
  4633. {
  4634. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4635. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4636. if (!alloc.reserve()) return xml_attribute();
  4637. xml_attribute a(impl::allocate_attribute(alloc));
  4638. if (!a) return xml_attribute();
  4639. impl::append_attribute(a._attr, _root);
  4640. a.set_name(name_);
  4641. return a;
  4642. }
  4643. PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
  4644. {
  4645. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4646. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4647. if (!alloc.reserve()) return xml_attribute();
  4648. xml_attribute a(impl::allocate_attribute(alloc));
  4649. if (!a) return xml_attribute();
  4650. impl::prepend_attribute(a._attr, _root);
  4651. a.set_name(name_);
  4652. return a;
  4653. }
  4654. PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
  4655. {
  4656. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4657. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4658. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4659. if (!alloc.reserve()) return xml_attribute();
  4660. xml_attribute a(impl::allocate_attribute(alloc));
  4661. if (!a) return xml_attribute();
  4662. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4663. a.set_name(name_);
  4664. return a;
  4665. }
  4666. PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
  4667. {
  4668. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4669. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4670. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4671. if (!alloc.reserve()) return xml_attribute();
  4672. xml_attribute a(impl::allocate_attribute(alloc));
  4673. if (!a) return xml_attribute();
  4674. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4675. a.set_name(name_);
  4676. return a;
  4677. }
  4678. PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
  4679. {
  4680. if (!proto) return xml_attribute();
  4681. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4682. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4683. if (!alloc.reserve()) return xml_attribute();
  4684. xml_attribute a(impl::allocate_attribute(alloc));
  4685. if (!a) return xml_attribute();
  4686. impl::append_attribute(a._attr, _root);
  4687. impl::node_copy_attribute(a._attr, proto._attr);
  4688. return a;
  4689. }
  4690. PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
  4691. {
  4692. if (!proto) return xml_attribute();
  4693. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4694. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4695. if (!alloc.reserve()) return xml_attribute();
  4696. xml_attribute a(impl::allocate_attribute(alloc));
  4697. if (!a) return xml_attribute();
  4698. impl::prepend_attribute(a._attr, _root);
  4699. impl::node_copy_attribute(a._attr, proto._attr);
  4700. return a;
  4701. }
  4702. PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
  4703. {
  4704. if (!proto) return xml_attribute();
  4705. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4706. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4707. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4708. if (!alloc.reserve()) return xml_attribute();
  4709. xml_attribute a(impl::allocate_attribute(alloc));
  4710. if (!a) return xml_attribute();
  4711. impl::insert_attribute_after(a._attr, attr._attr, _root);
  4712. impl::node_copy_attribute(a._attr, proto._attr);
  4713. return a;
  4714. }
  4715. PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
  4716. {
  4717. if (!proto) return xml_attribute();
  4718. if (!impl::allow_insert_attribute(type())) return xml_attribute();
  4719. if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
  4720. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4721. if (!alloc.reserve()) return xml_attribute();
  4722. xml_attribute a(impl::allocate_attribute(alloc));
  4723. if (!a) return xml_attribute();
  4724. impl::insert_attribute_before(a._attr, attr._attr, _root);
  4725. impl::node_copy_attribute(a._attr, proto._attr);
  4726. return a;
  4727. }
  4728. PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
  4729. {
  4730. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4731. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4732. if (!alloc.reserve()) return xml_node();
  4733. xml_node n(impl::allocate_node(alloc, type_));
  4734. if (!n) return xml_node();
  4735. impl::append_node(n._root, _root);
  4736. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4737. return n;
  4738. }
  4739. PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
  4740. {
  4741. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4742. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4743. if (!alloc.reserve()) return xml_node();
  4744. xml_node n(impl::allocate_node(alloc, type_));
  4745. if (!n) return xml_node();
  4746. impl::prepend_node(n._root, _root);
  4747. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4748. return n;
  4749. }
  4750. PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
  4751. {
  4752. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4753. if (!node._root || node._root->parent != _root) return xml_node();
  4754. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4755. if (!alloc.reserve()) return xml_node();
  4756. xml_node n(impl::allocate_node(alloc, type_));
  4757. if (!n) return xml_node();
  4758. impl::insert_node_before(n._root, node._root);
  4759. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4760. return n;
  4761. }
  4762. PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
  4763. {
  4764. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4765. if (!node._root || node._root->parent != _root) return xml_node();
  4766. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4767. if (!alloc.reserve()) return xml_node();
  4768. xml_node n(impl::allocate_node(alloc, type_));
  4769. if (!n) return xml_node();
  4770. impl::insert_node_after(n._root, node._root);
  4771. if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
  4772. return n;
  4773. }
  4774. PUGI__FN xml_node xml_node::append_child(const char_t* name_)
  4775. {
  4776. xml_node result = append_child(node_element);
  4777. result.set_name(name_);
  4778. return result;
  4779. }
  4780. PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
  4781. {
  4782. xml_node result = prepend_child(node_element);
  4783. result.set_name(name_);
  4784. return result;
  4785. }
  4786. PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
  4787. {
  4788. xml_node result = insert_child_after(node_element, node);
  4789. result.set_name(name_);
  4790. return result;
  4791. }
  4792. PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
  4793. {
  4794. xml_node result = insert_child_before(node_element, node);
  4795. result.set_name(name_);
  4796. return result;
  4797. }
  4798. PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
  4799. {
  4800. xml_node_type type_ = proto.type();
  4801. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4802. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4803. if (!alloc.reserve()) return xml_node();
  4804. xml_node n(impl::allocate_node(alloc, type_));
  4805. if (!n) return xml_node();
  4806. impl::append_node(n._root, _root);
  4807. impl::node_copy_tree(n._root, proto._root);
  4808. return n;
  4809. }
  4810. PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
  4811. {
  4812. xml_node_type type_ = proto.type();
  4813. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4814. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4815. if (!alloc.reserve()) return xml_node();
  4816. xml_node n(impl::allocate_node(alloc, type_));
  4817. if (!n) return xml_node();
  4818. impl::prepend_node(n._root, _root);
  4819. impl::node_copy_tree(n._root, proto._root);
  4820. return n;
  4821. }
  4822. PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
  4823. {
  4824. xml_node_type type_ = proto.type();
  4825. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4826. if (!node._root || node._root->parent != _root) return xml_node();
  4827. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4828. if (!alloc.reserve()) return xml_node();
  4829. xml_node n(impl::allocate_node(alloc, type_));
  4830. if (!n) return xml_node();
  4831. impl::insert_node_after(n._root, node._root);
  4832. impl::node_copy_tree(n._root, proto._root);
  4833. return n;
  4834. }
  4835. PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
  4836. {
  4837. xml_node_type type_ = proto.type();
  4838. if (!impl::allow_insert_child(type(), type_)) return xml_node();
  4839. if (!node._root || node._root->parent != _root) return xml_node();
  4840. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4841. if (!alloc.reserve()) return xml_node();
  4842. xml_node n(impl::allocate_node(alloc, type_));
  4843. if (!n) return xml_node();
  4844. impl::insert_node_before(n._root, node._root);
  4845. impl::node_copy_tree(n._root, proto._root);
  4846. return n;
  4847. }
  4848. PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
  4849. {
  4850. if (!impl::allow_move(*this, moved)) return xml_node();
  4851. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4852. if (!alloc.reserve()) return xml_node();
  4853. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4854. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4855. impl::remove_node(moved._root);
  4856. impl::append_node(moved._root, _root);
  4857. return moved;
  4858. }
  4859. PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
  4860. {
  4861. if (!impl::allow_move(*this, moved)) return xml_node();
  4862. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4863. if (!alloc.reserve()) return xml_node();
  4864. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4865. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4866. impl::remove_node(moved._root);
  4867. impl::prepend_node(moved._root, _root);
  4868. return moved;
  4869. }
  4870. PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
  4871. {
  4872. if (!impl::allow_move(*this, moved)) return xml_node();
  4873. if (!node._root || node._root->parent != _root) return xml_node();
  4874. if (moved._root == node._root) return xml_node();
  4875. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4876. if (!alloc.reserve()) return xml_node();
  4877. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4878. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4879. impl::remove_node(moved._root);
  4880. impl::insert_node_after(moved._root, node._root);
  4881. return moved;
  4882. }
  4883. PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
  4884. {
  4885. if (!impl::allow_move(*this, moved)) return xml_node();
  4886. if (!node._root || node._root->parent != _root) return xml_node();
  4887. if (moved._root == node._root) return xml_node();
  4888. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4889. if (!alloc.reserve()) return xml_node();
  4890. // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
  4891. impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
  4892. impl::remove_node(moved._root);
  4893. impl::insert_node_before(moved._root, node._root);
  4894. return moved;
  4895. }
  4896. PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
  4897. {
  4898. return remove_attribute(attribute(name_));
  4899. }
  4900. PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
  4901. {
  4902. if (!_root || !a._attr) return false;
  4903. if (!impl::is_attribute_of(a._attr, _root)) return false;
  4904. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4905. if (!alloc.reserve()) return false;
  4906. impl::remove_attribute(a._attr, _root);
  4907. impl::destroy_attribute(a._attr, alloc);
  4908. return true;
  4909. }
  4910. PUGI__FN bool xml_node::remove_attributes()
  4911. {
  4912. if (!_root) return false;
  4913. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4914. if (!alloc.reserve()) return false;
  4915. for (xml_attribute_struct* attr = _root->first_attribute; attr; )
  4916. {
  4917. xml_attribute_struct* next = attr->next_attribute;
  4918. impl::destroy_attribute(attr, alloc);
  4919. attr = next;
  4920. }
  4921. _root->first_attribute = 0;
  4922. return true;
  4923. }
  4924. PUGI__FN bool xml_node::remove_child(const char_t* name_)
  4925. {
  4926. return remove_child(child(name_));
  4927. }
  4928. PUGI__FN bool xml_node::remove_child(const xml_node& n)
  4929. {
  4930. if (!_root || !n._root || n._root->parent != _root) return false;
  4931. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4932. if (!alloc.reserve()) return false;
  4933. impl::remove_node(n._root);
  4934. impl::destroy_node(n._root, alloc);
  4935. return true;
  4936. }
  4937. PUGI__FN bool xml_node::remove_children()
  4938. {
  4939. if (!_root) return false;
  4940. impl::xml_allocator& alloc = impl::get_allocator(_root);
  4941. if (!alloc.reserve()) return false;
  4942. for (xml_node_struct* cur = _root->first_child; cur; )
  4943. {
  4944. xml_node_struct* next = cur->next_sibling;
  4945. impl::destroy_node(cur, alloc);
  4946. cur = next;
  4947. }
  4948. _root->first_child = 0;
  4949. return true;
  4950. }
  4951. PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  4952. {
  4953. // append_buffer is only valid for elements/documents
  4954. if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
  4955. // get document node
  4956. impl::xml_document_struct* doc = &impl::get_document(_root);
  4957. // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
  4958. doc->header |= impl::xml_memory_page_contents_shared_mask;
  4959. // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
  4960. impl::xml_memory_page* page = 0;
  4961. impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
  4962. (void)page;
  4963. if (!extra) return impl::make_parse_result(status_out_of_memory);
  4964. #ifdef PUGIXML_COMPACT
  4965. // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
  4966. // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
  4967. extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
  4968. #endif
  4969. // add extra buffer to the list
  4970. extra->buffer = 0;
  4971. extra->next = doc->extra_buffers;
  4972. doc->extra_buffers = extra;
  4973. // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
  4974. impl::name_null_sentry sentry(_root);
  4975. return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
  4976. }
  4977. PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
  4978. {
  4979. if (!_root) return xml_node();
  4980. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  4981. {
  4982. const char_t* iname = i->name;
  4983. if (iname && impl::strequal(name_, iname))
  4984. {
  4985. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  4986. {
  4987. const char_t* aname = a->name;
  4988. if (aname && impl::strequal(attr_name, aname))
  4989. {
  4990. const char_t* avalue = a->value;
  4991. if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
  4992. return xml_node(i);
  4993. }
  4994. }
  4995. }
  4996. }
  4997. return xml_node();
  4998. }
  4999. PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
  5000. {
  5001. if (!_root) return xml_node();
  5002. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  5003. for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
  5004. {
  5005. const char_t* aname = a->name;
  5006. if (aname && impl::strequal(attr_name, aname))
  5007. {
  5008. const char_t* avalue = a->value;
  5009. if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
  5010. return xml_node(i);
  5011. }
  5012. }
  5013. return xml_node();
  5014. }
  5015. #ifndef PUGIXML_NO_STL
  5016. PUGI__FN string_t xml_node::path(char_t delimiter) const
  5017. {
  5018. if (!_root) return string_t();
  5019. size_t offset = 0;
  5020. for (xml_node_struct* i = _root; i; i = i->parent)
  5021. {
  5022. const char_t* iname = i->name;
  5023. offset += (i != _root);
  5024. offset += iname ? impl::strlength(iname) : 0;
  5025. }
  5026. string_t result;
  5027. result.resize(offset);
  5028. for (xml_node_struct* j = _root; j; j = j->parent)
  5029. {
  5030. if (j != _root)
  5031. result[--offset] = delimiter;
  5032. const char_t* jname = j->name;
  5033. if (jname)
  5034. {
  5035. size_t length = impl::strlength(jname);
  5036. offset -= length;
  5037. memcpy(&result[offset], jname, length * sizeof(char_t));
  5038. }
  5039. }
  5040. assert(offset == 0);
  5041. return result;
  5042. }
  5043. #endif
  5044. PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
  5045. {
  5046. xml_node context = path_[0] == delimiter ? root() : *this;
  5047. if (!context._root) return xml_node();
  5048. const char_t* path_segment = path_;
  5049. while (*path_segment == delimiter) ++path_segment;
  5050. const char_t* path_segment_end = path_segment;
  5051. while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
  5052. if (path_segment == path_segment_end) return context;
  5053. const char_t* next_segment = path_segment_end;
  5054. while (*next_segment == delimiter) ++next_segment;
  5055. if (*path_segment == '.' && path_segment + 1 == path_segment_end)
  5056. return context.first_element_by_path(next_segment, delimiter);
  5057. else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
  5058. return context.parent().first_element_by_path(next_segment, delimiter);
  5059. else
  5060. {
  5061. for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
  5062. {
  5063. const char_t* jname = j->name;
  5064. if (jname && impl::strequalrange(jname, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
  5065. {
  5066. xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
  5067. if (subsearch) return subsearch;
  5068. }
  5069. }
  5070. return xml_node();
  5071. }
  5072. }
  5073. PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
  5074. {
  5075. walker._depth = -1;
  5076. xml_node arg_begin(_root);
  5077. if (!walker.begin(arg_begin)) return false;
  5078. xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
  5079. if (cur)
  5080. {
  5081. ++walker._depth;
  5082. do
  5083. {
  5084. xml_node arg_for_each(cur);
  5085. if (!walker.for_each(arg_for_each))
  5086. return false;
  5087. if (cur->first_child)
  5088. {
  5089. ++walker._depth;
  5090. cur = cur->first_child;
  5091. }
  5092. else if (cur->next_sibling)
  5093. cur = cur->next_sibling;
  5094. else
  5095. {
  5096. while (!cur->next_sibling && cur != _root && cur->parent)
  5097. {
  5098. --walker._depth;
  5099. cur = cur->parent;
  5100. }
  5101. if (cur != _root)
  5102. cur = cur->next_sibling;
  5103. }
  5104. }
  5105. while (cur && cur != _root);
  5106. }
  5107. assert(walker._depth == -1);
  5108. xml_node arg_end(_root);
  5109. return walker.end(arg_end);
  5110. }
  5111. PUGI__FN size_t xml_node::hash_value() const
  5112. {
  5113. return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
  5114. }
  5115. PUGI__FN xml_node_struct* xml_node::internal_object() const
  5116. {
  5117. return _root;
  5118. }
  5119. PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5120. {
  5121. if (!_root) return;
  5122. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5123. impl::node_output(buffered_writer, _root, indent, flags, depth);
  5124. buffered_writer.flush();
  5125. }
  5126. #ifndef PUGIXML_NO_STL
  5127. PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
  5128. {
  5129. xml_writer_stream writer(stream);
  5130. print(writer, indent, flags, encoding, depth);
  5131. }
  5132. PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
  5133. {
  5134. xml_writer_stream writer(stream);
  5135. print(writer, indent, flags, encoding_wchar, depth);
  5136. }
  5137. #endif
  5138. PUGI__FN ptrdiff_t xml_node::offset_debug() const
  5139. {
  5140. if (!_root) return -1;
  5141. impl::xml_document_struct& doc = impl::get_document(_root);
  5142. // we can determine the offset reliably only if there is exactly once parse buffer
  5143. if (!doc.buffer || doc.extra_buffers) return -1;
  5144. switch (type())
  5145. {
  5146. case node_document:
  5147. return 0;
  5148. case node_element:
  5149. case node_declaration:
  5150. case node_pi:
  5151. return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
  5152. case node_pcdata:
  5153. case node_cdata:
  5154. case node_comment:
  5155. case node_doctype:
  5156. return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
  5157. default:
  5158. assert(false && "Invalid node type"); // unreachable
  5159. return -1;
  5160. }
  5161. }
  5162. #ifdef __BORLANDC__
  5163. PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
  5164. {
  5165. return (bool)lhs && rhs;
  5166. }
  5167. PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
  5168. {
  5169. return (bool)lhs || rhs;
  5170. }
  5171. #endif
  5172. PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
  5173. {
  5174. }
  5175. PUGI__FN xml_node_struct* xml_text::_data() const
  5176. {
  5177. if (!_root || impl::is_text_node(_root)) return _root;
  5178. // element nodes can have value if parse_embed_pcdata was used
  5179. if (PUGI__NODETYPE(_root) == node_element && _root->value)
  5180. return _root;
  5181. for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
  5182. if (impl::is_text_node(node))
  5183. return node;
  5184. return 0;
  5185. }
  5186. PUGI__FN xml_node_struct* xml_text::_data_new()
  5187. {
  5188. xml_node_struct* d = _data();
  5189. if (d) return d;
  5190. return xml_node(_root).append_child(node_pcdata).internal_object();
  5191. }
  5192. PUGI__FN xml_text::xml_text(): _root(0)
  5193. {
  5194. }
  5195. PUGI__FN static void unspecified_bool_xml_text(xml_text***)
  5196. {
  5197. }
  5198. PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
  5199. {
  5200. return _data() ? unspecified_bool_xml_text : 0;
  5201. }
  5202. PUGI__FN bool xml_text::operator!() const
  5203. {
  5204. return !_data();
  5205. }
  5206. PUGI__FN bool xml_text::empty() const
  5207. {
  5208. return _data() == 0;
  5209. }
  5210. PUGI__FN const char_t* xml_text::get() const
  5211. {
  5212. xml_node_struct* d = _data();
  5213. if (!d) return PUGIXML_TEXT("");
  5214. const char_t* value = d->value;
  5215. return value ? value : PUGIXML_TEXT("");
  5216. }
  5217. PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
  5218. {
  5219. xml_node_struct* d = _data();
  5220. if (!d) return def;
  5221. const char_t* value = d->value;
  5222. return value ? value : def;
  5223. }
  5224. PUGI__FN int xml_text::as_int(int def) const
  5225. {
  5226. xml_node_struct* d = _data();
  5227. if (!d) return def;
  5228. const char_t* value = d->value;
  5229. return value ? impl::get_value_int(value) : def;
  5230. }
  5231. PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
  5232. {
  5233. xml_node_struct* d = _data();
  5234. if (!d) return def;
  5235. const char_t* value = d->value;
  5236. return value ? impl::get_value_uint(value) : def;
  5237. }
  5238. PUGI__FN double xml_text::as_double(double def) const
  5239. {
  5240. xml_node_struct* d = _data();
  5241. if (!d) return def;
  5242. const char_t* value = d->value;
  5243. return value ? impl::get_value_double(value) : def;
  5244. }
  5245. PUGI__FN float xml_text::as_float(float def) const
  5246. {
  5247. xml_node_struct* d = _data();
  5248. if (!d) return def;
  5249. const char_t* value = d->value;
  5250. return value ? impl::get_value_float(value) : def;
  5251. }
  5252. PUGI__FN bool xml_text::as_bool(bool def) const
  5253. {
  5254. xml_node_struct* d = _data();
  5255. if (!d) return def;
  5256. const char_t* value = d->value;
  5257. return value ? impl::get_value_bool(value) : def;
  5258. }
  5259. #ifdef PUGIXML_HAS_LONG_LONG
  5260. PUGI__FN long long xml_text::as_llong(long long def) const
  5261. {
  5262. xml_node_struct* d = _data();
  5263. if (!d) return def;
  5264. const char_t* value = d->value;
  5265. return value ? impl::get_value_llong(value) : def;
  5266. }
  5267. PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
  5268. {
  5269. xml_node_struct* d = _data();
  5270. if (!d) return def;
  5271. const char_t* value = d->value;
  5272. return value ? impl::get_value_ullong(value) : def;
  5273. }
  5274. #endif
  5275. PUGI__FN bool xml_text::set(const char_t* rhs, size_t sz)
  5276. {
  5277. xml_node_struct* dn = _data_new();
  5278. return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, sz) : false;
  5279. }
  5280. PUGI__FN bool xml_text::set(const char_t* rhs)
  5281. {
  5282. xml_node_struct* dn = _data_new();
  5283. return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
  5284. }
  5285. PUGI__FN bool xml_text::set(int rhs)
  5286. {
  5287. xml_node_struct* dn = _data_new();
  5288. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5289. }
  5290. PUGI__FN bool xml_text::set(unsigned int rhs)
  5291. {
  5292. xml_node_struct* dn = _data_new();
  5293. return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5294. }
  5295. PUGI__FN bool xml_text::set(long rhs)
  5296. {
  5297. xml_node_struct* dn = _data_new();
  5298. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5299. }
  5300. PUGI__FN bool xml_text::set(unsigned long rhs)
  5301. {
  5302. xml_node_struct* dn = _data_new();
  5303. return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5304. }
  5305. PUGI__FN bool xml_text::set(float rhs)
  5306. {
  5307. xml_node_struct* dn = _data_new();
  5308. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
  5309. }
  5310. PUGI__FN bool xml_text::set(float rhs, int precision)
  5311. {
  5312. xml_node_struct* dn = _data_new();
  5313. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5314. }
  5315. PUGI__FN bool xml_text::set(double rhs)
  5316. {
  5317. xml_node_struct* dn = _data_new();
  5318. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
  5319. }
  5320. PUGI__FN bool xml_text::set(double rhs, int precision)
  5321. {
  5322. xml_node_struct* dn = _data_new();
  5323. return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
  5324. }
  5325. PUGI__FN bool xml_text::set(bool rhs)
  5326. {
  5327. xml_node_struct* dn = _data_new();
  5328. return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
  5329. }
  5330. #ifdef PUGIXML_HAS_LONG_LONG
  5331. PUGI__FN bool xml_text::set(long long rhs)
  5332. {
  5333. xml_node_struct* dn = _data_new();
  5334. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
  5335. }
  5336. PUGI__FN bool xml_text::set(unsigned long long rhs)
  5337. {
  5338. xml_node_struct* dn = _data_new();
  5339. return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
  5340. }
  5341. #endif
  5342. PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
  5343. {
  5344. set(rhs);
  5345. return *this;
  5346. }
  5347. PUGI__FN xml_text& xml_text::operator=(int rhs)
  5348. {
  5349. set(rhs);
  5350. return *this;
  5351. }
  5352. PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
  5353. {
  5354. set(rhs);
  5355. return *this;
  5356. }
  5357. PUGI__FN xml_text& xml_text::operator=(long rhs)
  5358. {
  5359. set(rhs);
  5360. return *this;
  5361. }
  5362. PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
  5363. {
  5364. set(rhs);
  5365. return *this;
  5366. }
  5367. PUGI__FN xml_text& xml_text::operator=(double rhs)
  5368. {
  5369. set(rhs);
  5370. return *this;
  5371. }
  5372. PUGI__FN xml_text& xml_text::operator=(float rhs)
  5373. {
  5374. set(rhs);
  5375. return *this;
  5376. }
  5377. PUGI__FN xml_text& xml_text::operator=(bool rhs)
  5378. {
  5379. set(rhs);
  5380. return *this;
  5381. }
  5382. #ifdef PUGIXML_HAS_LONG_LONG
  5383. PUGI__FN xml_text& xml_text::operator=(long long rhs)
  5384. {
  5385. set(rhs);
  5386. return *this;
  5387. }
  5388. PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
  5389. {
  5390. set(rhs);
  5391. return *this;
  5392. }
  5393. #endif
  5394. PUGI__FN xml_node xml_text::data() const
  5395. {
  5396. return xml_node(_data());
  5397. }
  5398. #ifdef __BORLANDC__
  5399. PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
  5400. {
  5401. return (bool)lhs && rhs;
  5402. }
  5403. PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
  5404. {
  5405. return (bool)lhs || rhs;
  5406. }
  5407. #endif
  5408. PUGI__FN xml_node_iterator::xml_node_iterator()
  5409. {
  5410. }
  5411. PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
  5412. {
  5413. }
  5414. PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5415. {
  5416. }
  5417. PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
  5418. {
  5419. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5420. }
  5421. PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
  5422. {
  5423. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5424. }
  5425. PUGI__FN xml_node& xml_node_iterator::operator*() const
  5426. {
  5427. assert(_wrap._root);
  5428. return _wrap;
  5429. }
  5430. PUGI__FN xml_node* xml_node_iterator::operator->() const
  5431. {
  5432. assert(_wrap._root);
  5433. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5434. }
  5435. PUGI__FN xml_node_iterator& xml_node_iterator::operator++()
  5436. {
  5437. assert(_wrap._root);
  5438. _wrap._root = _wrap._root->next_sibling;
  5439. return *this;
  5440. }
  5441. PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
  5442. {
  5443. xml_node_iterator temp = *this;
  5444. ++*this;
  5445. return temp;
  5446. }
  5447. PUGI__FN xml_node_iterator& xml_node_iterator::operator--()
  5448. {
  5449. _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
  5450. return *this;
  5451. }
  5452. PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
  5453. {
  5454. xml_node_iterator temp = *this;
  5455. --*this;
  5456. return temp;
  5457. }
  5458. PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
  5459. {
  5460. }
  5461. PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
  5462. {
  5463. }
  5464. PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
  5465. {
  5466. }
  5467. PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
  5468. {
  5469. return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
  5470. }
  5471. PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
  5472. {
  5473. return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
  5474. }
  5475. PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
  5476. {
  5477. assert(_wrap._attr);
  5478. return _wrap;
  5479. }
  5480. PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
  5481. {
  5482. assert(_wrap._attr);
  5483. return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
  5484. }
  5485. PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++()
  5486. {
  5487. assert(_wrap._attr);
  5488. _wrap._attr = _wrap._attr->next_attribute;
  5489. return *this;
  5490. }
  5491. PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
  5492. {
  5493. xml_attribute_iterator temp = *this;
  5494. ++*this;
  5495. return temp;
  5496. }
  5497. PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--()
  5498. {
  5499. _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
  5500. return *this;
  5501. }
  5502. PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
  5503. {
  5504. xml_attribute_iterator temp = *this;
  5505. --*this;
  5506. return temp;
  5507. }
  5508. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
  5509. {
  5510. }
  5511. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
  5512. {
  5513. }
  5514. PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
  5515. {
  5516. }
  5517. PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
  5518. {
  5519. return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
  5520. }
  5521. PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
  5522. {
  5523. return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
  5524. }
  5525. PUGI__FN xml_node& xml_named_node_iterator::operator*() const
  5526. {
  5527. assert(_wrap._root);
  5528. return _wrap;
  5529. }
  5530. PUGI__FN xml_node* xml_named_node_iterator::operator->() const
  5531. {
  5532. assert(_wrap._root);
  5533. return const_cast<xml_node*>(&_wrap); // BCC5 workaround
  5534. }
  5535. PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++()
  5536. {
  5537. assert(_wrap._root);
  5538. _wrap = _wrap.next_sibling(_name);
  5539. return *this;
  5540. }
  5541. PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
  5542. {
  5543. xml_named_node_iterator temp = *this;
  5544. ++*this;
  5545. return temp;
  5546. }
  5547. PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--()
  5548. {
  5549. if (_wrap._root)
  5550. _wrap = _wrap.previous_sibling(_name);
  5551. else
  5552. {
  5553. _wrap = _parent.last_child();
  5554. if (!impl::strequal(_wrap.name(), _name))
  5555. _wrap = _wrap.previous_sibling(_name);
  5556. }
  5557. return *this;
  5558. }
  5559. PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
  5560. {
  5561. xml_named_node_iterator temp = *this;
  5562. --*this;
  5563. return temp;
  5564. }
  5565. PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
  5566. {
  5567. }
  5568. PUGI__FN xml_parse_result::operator bool() const
  5569. {
  5570. return status == status_ok;
  5571. }
  5572. PUGI__FN const char* xml_parse_result::description() const
  5573. {
  5574. switch (status)
  5575. {
  5576. case status_ok: return "No error";
  5577. case status_file_not_found: return "File was not found";
  5578. case status_io_error: return "Error reading from file/stream";
  5579. case status_out_of_memory: return "Could not allocate memory";
  5580. case status_internal_error: return "Internal error occurred";
  5581. case status_unrecognized_tag: return "Could not determine tag type";
  5582. case status_bad_pi: return "Error parsing document declaration/processing instruction";
  5583. case status_bad_comment: return "Error parsing comment";
  5584. case status_bad_cdata: return "Error parsing CDATA section";
  5585. case status_bad_doctype: return "Error parsing document type declaration";
  5586. case status_bad_pcdata: return "Error parsing PCDATA section";
  5587. case status_bad_start_element: return "Error parsing start element tag";
  5588. case status_bad_attribute: return "Error parsing element attribute";
  5589. case status_bad_end_element: return "Error parsing end element tag";
  5590. case status_end_element_mismatch: return "Start-end tags mismatch";
  5591. case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
  5592. case status_no_document_element: return "No document element found";
  5593. default: return "Unknown error";
  5594. }
  5595. }
  5596. PUGI__FN xml_document::xml_document(): _buffer(0)
  5597. {
  5598. _create();
  5599. }
  5600. PUGI__FN xml_document::~xml_document()
  5601. {
  5602. _destroy();
  5603. }
  5604. #ifdef PUGIXML_HAS_MOVE
  5605. PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
  5606. {
  5607. _create();
  5608. _move(rhs);
  5609. }
  5610. PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5611. {
  5612. if (this == &rhs) return *this;
  5613. _destroy();
  5614. _create();
  5615. _move(rhs);
  5616. return *this;
  5617. }
  5618. #endif
  5619. PUGI__FN void xml_document::reset()
  5620. {
  5621. _destroy();
  5622. _create();
  5623. }
  5624. PUGI__FN void xml_document::reset(const xml_document& proto)
  5625. {
  5626. reset();
  5627. impl::node_copy_tree(_root, proto._root);
  5628. }
  5629. PUGI__FN void xml_document::_create()
  5630. {
  5631. assert(!_root);
  5632. #ifdef PUGIXML_COMPACT
  5633. // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
  5634. const size_t page_offset = sizeof(void*);
  5635. #else
  5636. const size_t page_offset = 0;
  5637. #endif
  5638. // initialize sentinel page
  5639. PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
  5640. // prepare page structure
  5641. impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
  5642. assert(page);
  5643. page->busy_size = impl::xml_memory_page_size;
  5644. // setup first page marker
  5645. #ifdef PUGIXML_COMPACT
  5646. // round-trip through void* to avoid 'cast increases required alignment of target type' warning
  5647. page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
  5648. *page->compact_page_marker = sizeof(impl::xml_memory_page);
  5649. #endif
  5650. // allocate new root
  5651. _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
  5652. _root->prev_sibling_c = _root;
  5653. // setup sentinel page
  5654. page->allocator = static_cast<impl::xml_document_struct*>(_root);
  5655. // setup hash table pointer in allocator
  5656. #ifdef PUGIXML_COMPACT
  5657. page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
  5658. #endif
  5659. // verify the document allocation
  5660. assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
  5661. }
  5662. PUGI__FN void xml_document::_destroy()
  5663. {
  5664. assert(_root);
  5665. // destroy static storage
  5666. if (_buffer)
  5667. {
  5668. impl::xml_memory::deallocate(_buffer);
  5669. _buffer = 0;
  5670. }
  5671. // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
  5672. for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
  5673. {
  5674. if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
  5675. }
  5676. // destroy dynamic storage, leave sentinel page (it's in static memory)
  5677. impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
  5678. assert(root_page && !root_page->prev);
  5679. assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
  5680. for (impl::xml_memory_page* page = root_page->next; page; )
  5681. {
  5682. impl::xml_memory_page* next = page->next;
  5683. impl::xml_allocator::deallocate_page(page);
  5684. page = next;
  5685. }
  5686. #ifdef PUGIXML_COMPACT
  5687. // destroy hash table
  5688. static_cast<impl::xml_document_struct*>(_root)->hash.clear();
  5689. #endif
  5690. _root = 0;
  5691. }
  5692. #ifdef PUGIXML_HAS_MOVE
  5693. PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
  5694. {
  5695. impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
  5696. impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
  5697. // save first child pointer for later; this needs hash access
  5698. xml_node_struct* other_first_child = other->first_child;
  5699. #ifdef PUGIXML_COMPACT
  5700. // reserve space for the hash table up front; this is the only operation that can fail
  5701. // if it does, we have no choice but to throw (if we have exceptions)
  5702. if (other_first_child)
  5703. {
  5704. size_t other_children = 0;
  5705. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5706. other_children++;
  5707. // in compact mode, each pointer assignment could result in a hash table request
  5708. // during move, we have to relocate document first_child and parents of all children
  5709. // normally there's just one child and its parent has a pointerless encoding but
  5710. // we assume the worst here
  5711. if (!other->_hash->reserve(other_children + 1))
  5712. {
  5713. #ifdef PUGIXML_NO_EXCEPTIONS
  5714. return;
  5715. #else
  5716. throw std::bad_alloc();
  5717. #endif
  5718. }
  5719. }
  5720. #endif
  5721. // move allocation state
  5722. // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
  5723. if (other->_root != PUGI__GETPAGE(other))
  5724. {
  5725. doc->_root = other->_root;
  5726. doc->_busy_size = other->_busy_size;
  5727. }
  5728. // move buffer state
  5729. doc->buffer = other->buffer;
  5730. doc->extra_buffers = other->extra_buffers;
  5731. _buffer = rhs._buffer;
  5732. #ifdef PUGIXML_COMPACT
  5733. // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
  5734. doc->hash = other->hash;
  5735. doc->_hash = &doc->hash;
  5736. // make sure we don't access other hash up until the end when we reinitialize other document
  5737. other->_hash = 0;
  5738. #endif
  5739. // move page structure
  5740. impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
  5741. assert(doc_page && !doc_page->prev && !doc_page->next);
  5742. impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
  5743. assert(other_page && !other_page->prev);
  5744. // relink pages since root page is embedded into xml_document
  5745. if (impl::xml_memory_page* page = other_page->next)
  5746. {
  5747. assert(page->prev == other_page);
  5748. page->prev = doc_page;
  5749. doc_page->next = page;
  5750. other_page->next = 0;
  5751. }
  5752. // make sure pages point to the correct document state
  5753. for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
  5754. {
  5755. assert(page->allocator == other);
  5756. page->allocator = doc;
  5757. #ifdef PUGIXML_COMPACT
  5758. // this automatically migrates most children between documents and prevents ->parent assignment from allocating
  5759. if (page->compact_shared_parent == other)
  5760. page->compact_shared_parent = doc;
  5761. #endif
  5762. }
  5763. // move tree structure
  5764. assert(!doc->first_child);
  5765. doc->first_child = other_first_child;
  5766. for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
  5767. {
  5768. #ifdef PUGIXML_COMPACT
  5769. // most children will have migrated when we reassigned compact_shared_parent
  5770. assert(node->parent == other || node->parent == doc);
  5771. node->parent = doc;
  5772. #else
  5773. assert(node->parent == other);
  5774. node->parent = doc;
  5775. #endif
  5776. }
  5777. // reset other document
  5778. new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
  5779. rhs._buffer = 0;
  5780. }
  5781. #endif
  5782. #ifndef PUGIXML_NO_STL
  5783. PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
  5784. {
  5785. reset();
  5786. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
  5787. }
  5788. PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
  5789. {
  5790. reset();
  5791. return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
  5792. }
  5793. #endif
  5794. PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
  5795. {
  5796. // Force native encoding (skip autodetection)
  5797. #ifdef PUGIXML_WCHAR_MODE
  5798. xml_encoding encoding = encoding_wchar;
  5799. #else
  5800. xml_encoding encoding = encoding_utf8;
  5801. #endif
  5802. return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
  5803. }
  5804. PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
  5805. {
  5806. return load_string(contents, options);
  5807. }
  5808. PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
  5809. {
  5810. reset();
  5811. using impl::auto_deleter; // MSVC7 workaround
  5812. auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
  5813. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5814. }
  5815. PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
  5816. {
  5817. reset();
  5818. using impl::auto_deleter; // MSVC7 workaround
  5819. auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
  5820. return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
  5821. }
  5822. PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5823. {
  5824. reset();
  5825. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
  5826. }
  5827. PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5828. {
  5829. reset();
  5830. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
  5831. }
  5832. PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
  5833. {
  5834. reset();
  5835. return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
  5836. }
  5837. PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5838. {
  5839. impl::xml_buffered_writer buffered_writer(writer, encoding);
  5840. if ((flags & format_write_bom) && encoding != encoding_latin1)
  5841. {
  5842. // BOM always represents the codepoint U+FEFF, so just write it in native encoding
  5843. #ifdef PUGIXML_WCHAR_MODE
  5844. unsigned int bom = 0xfeff;
  5845. buffered_writer.write(static_cast<wchar_t>(bom));
  5846. #else
  5847. buffered_writer.write('\xef', '\xbb', '\xbf');
  5848. #endif
  5849. }
  5850. if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
  5851. {
  5852. buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
  5853. if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
  5854. buffered_writer.write('?', '>');
  5855. if (!(flags & format_raw)) buffered_writer.write('\n');
  5856. }
  5857. impl::node_output(buffered_writer, _root, indent, flags, 0);
  5858. buffered_writer.flush();
  5859. }
  5860. #ifndef PUGIXML_NO_STL
  5861. PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5862. {
  5863. xml_writer_stream writer(stream);
  5864. save(writer, indent, flags, encoding);
  5865. }
  5866. PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
  5867. {
  5868. xml_writer_stream writer(stream);
  5869. save(writer, indent, flags, encoding_wchar);
  5870. }
  5871. #endif
  5872. PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5873. {
  5874. using impl::auto_deleter; // MSVC7 workaround
  5875. auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
  5876. return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
  5877. }
  5878. PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
  5879. {
  5880. using impl::auto_deleter; // MSVC7 workaround
  5881. auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
  5882. return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
  5883. }
  5884. PUGI__FN xml_node xml_document::document_element() const
  5885. {
  5886. assert(_root);
  5887. for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
  5888. if (PUGI__NODETYPE(i) == node_element)
  5889. return xml_node(i);
  5890. return xml_node();
  5891. }
  5892. #ifndef PUGIXML_NO_STL
  5893. PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
  5894. {
  5895. assert(str);
  5896. return impl::as_utf8_impl(str, impl::strlength_wide(str));
  5897. }
  5898. PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
  5899. {
  5900. return impl::as_utf8_impl(str.c_str(), str.size());
  5901. }
  5902. PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
  5903. {
  5904. assert(str);
  5905. return impl::as_wide_impl(str, strlen(str));
  5906. }
  5907. PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
  5908. {
  5909. return impl::as_wide_impl(str.c_str(), str.size());
  5910. }
  5911. #endif
  5912. PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
  5913. {
  5914. impl::xml_memory::allocate = allocate;
  5915. impl::xml_memory::deallocate = deallocate;
  5916. }
  5917. PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
  5918. {
  5919. return impl::xml_memory::allocate;
  5920. }
  5921. PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
  5922. {
  5923. return impl::xml_memory::deallocate;
  5924. }
  5925. }
  5926. #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
  5927. namespace std
  5928. {
  5929. // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
  5930. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
  5931. {
  5932. return std::bidirectional_iterator_tag();
  5933. }
  5934. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
  5935. {
  5936. return std::bidirectional_iterator_tag();
  5937. }
  5938. PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
  5939. {
  5940. return std::bidirectional_iterator_tag();
  5941. }
  5942. }
  5943. #endif
  5944. #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
  5945. namespace std
  5946. {
  5947. // Workarounds for (non-standard) iterator category detection
  5948. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
  5949. {
  5950. return std::bidirectional_iterator_tag();
  5951. }
  5952. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
  5953. {
  5954. return std::bidirectional_iterator_tag();
  5955. }
  5956. PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
  5957. {
  5958. return std::bidirectional_iterator_tag();
  5959. }
  5960. }
  5961. #endif
  5962. #ifndef PUGIXML_NO_XPATH
  5963. // STL replacements
  5964. PUGI__NS_BEGIN
  5965. struct equal_to
  5966. {
  5967. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5968. {
  5969. return lhs == rhs;
  5970. }
  5971. };
  5972. struct not_equal_to
  5973. {
  5974. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5975. {
  5976. return lhs != rhs;
  5977. }
  5978. };
  5979. struct less
  5980. {
  5981. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5982. {
  5983. return lhs < rhs;
  5984. }
  5985. };
  5986. struct less_equal
  5987. {
  5988. template <typename T> bool operator()(const T& lhs, const T& rhs) const
  5989. {
  5990. return lhs <= rhs;
  5991. }
  5992. };
  5993. template <typename T> inline void swap(T& lhs, T& rhs)
  5994. {
  5995. T temp = lhs;
  5996. lhs = rhs;
  5997. rhs = temp;
  5998. }
  5999. template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
  6000. {
  6001. I result = begin;
  6002. for (I it = begin + 1; it != end; ++it)
  6003. if (pred(*it, *result))
  6004. result = it;
  6005. return result;
  6006. }
  6007. template <typename I> PUGI__FN void reverse(I begin, I end)
  6008. {
  6009. while (end - begin > 1)
  6010. swap(*begin++, *--end);
  6011. }
  6012. template <typename I> PUGI__FN I unique(I begin, I end)
  6013. {
  6014. // fast skip head
  6015. while (end - begin > 1 && *begin != *(begin + 1))
  6016. begin++;
  6017. if (begin == end)
  6018. return begin;
  6019. // last written element
  6020. I write = begin++;
  6021. // merge unique elements
  6022. while (begin != end)
  6023. {
  6024. if (*begin != *write)
  6025. *++write = *begin++;
  6026. else
  6027. begin++;
  6028. }
  6029. // past-the-end (write points to live element)
  6030. return write + 1;
  6031. }
  6032. template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
  6033. {
  6034. if (begin == end)
  6035. return;
  6036. for (T* it = begin + 1; it != end; ++it)
  6037. {
  6038. T val = *it;
  6039. T* hole = it;
  6040. // move hole backwards
  6041. while (hole > begin && pred(val, *(hole - 1)))
  6042. {
  6043. *hole = *(hole - 1);
  6044. hole--;
  6045. }
  6046. // fill hole with element
  6047. *hole = val;
  6048. }
  6049. }
  6050. template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
  6051. {
  6052. if (pred(*middle, *first))
  6053. swap(middle, first);
  6054. if (pred(*last, *middle))
  6055. swap(last, middle);
  6056. if (pred(*middle, *first))
  6057. swap(middle, first);
  6058. return middle;
  6059. }
  6060. template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
  6061. {
  6062. // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
  6063. T* eq = begin;
  6064. T* lt = begin;
  6065. T* gt = end;
  6066. while (lt < gt)
  6067. {
  6068. if (pred(*lt, pivot))
  6069. lt++;
  6070. else if (*lt == pivot)
  6071. swap(*eq++, *lt++);
  6072. else
  6073. swap(*lt, *--gt);
  6074. }
  6075. // we now have just 4 groups: = < >; move equal elements to the middle
  6076. T* eqbeg = gt;
  6077. for (T* it = begin; it != eq; ++it)
  6078. swap(*it, *--eqbeg);
  6079. *out_eqbeg = eqbeg;
  6080. *out_eqend = gt;
  6081. }
  6082. template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
  6083. {
  6084. // sort large chunks
  6085. while (end - begin > 16)
  6086. {
  6087. // find median element
  6088. I middle = begin + (end - begin) / 2;
  6089. I median = median3(begin, middle, end - 1, pred);
  6090. // partition in three chunks (< = >)
  6091. I eqbeg, eqend;
  6092. partition3(begin, end, *median, pred, &eqbeg, &eqend);
  6093. // loop on larger half
  6094. if (eqbeg - begin > end - eqend)
  6095. {
  6096. sort(eqend, end, pred);
  6097. end = eqbeg;
  6098. }
  6099. else
  6100. {
  6101. sort(begin, eqbeg, pred);
  6102. begin = eqend;
  6103. }
  6104. }
  6105. // insertion sort small chunk
  6106. insertion_sort(begin, end, pred);
  6107. }
  6108. PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
  6109. {
  6110. assert(key);
  6111. unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
  6112. // MurmurHash3 32-bit finalizer
  6113. h ^= h >> 16;
  6114. h *= 0x85ebca6bu;
  6115. h ^= h >> 13;
  6116. h *= 0xc2b2ae35u;
  6117. h ^= h >> 16;
  6118. size_t hashmod = size - 1;
  6119. size_t bucket = h & hashmod;
  6120. for (size_t probe = 0; probe <= hashmod; ++probe)
  6121. {
  6122. if (table[bucket] == 0)
  6123. {
  6124. table[bucket] = key;
  6125. return true;
  6126. }
  6127. if (table[bucket] == key)
  6128. return false;
  6129. // hash collision, quadratic probing
  6130. bucket = (bucket + probe + 1) & hashmod;
  6131. }
  6132. assert(false && "Hash table is full"); // unreachable
  6133. return false;
  6134. }
  6135. PUGI__NS_END
  6136. // Allocator used for AST and evaluation stacks
  6137. PUGI__NS_BEGIN
  6138. static const size_t xpath_memory_page_size =
  6139. #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6140. PUGIXML_MEMORY_XPATH_PAGE_SIZE
  6141. #else
  6142. 4096
  6143. #endif
  6144. ;
  6145. static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
  6146. struct xpath_memory_block
  6147. {
  6148. xpath_memory_block* next;
  6149. size_t capacity;
  6150. union
  6151. {
  6152. char data[xpath_memory_page_size];
  6153. double alignment;
  6154. };
  6155. };
  6156. struct xpath_allocator
  6157. {
  6158. xpath_memory_block* _root;
  6159. size_t _root_size;
  6160. bool* _error;
  6161. xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
  6162. {
  6163. }
  6164. void* allocate(size_t size)
  6165. {
  6166. // round size up to block alignment boundary
  6167. size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6168. if (_root_size + size <= _root->capacity)
  6169. {
  6170. void* buf = &_root->data[0] + _root_size;
  6171. _root_size += size;
  6172. return buf;
  6173. }
  6174. else
  6175. {
  6176. // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
  6177. size_t block_capacity_base = sizeof(_root->data);
  6178. size_t block_capacity_req = size + block_capacity_base / 4;
  6179. size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
  6180. size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
  6181. xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
  6182. if (!block)
  6183. {
  6184. if (_error) *_error = true;
  6185. return 0;
  6186. }
  6187. block->next = _root;
  6188. block->capacity = block_capacity;
  6189. _root = block;
  6190. _root_size = size;
  6191. return block->data;
  6192. }
  6193. }
  6194. void* reallocate(void* ptr, size_t old_size, size_t new_size)
  6195. {
  6196. // round size up to block alignment boundary
  6197. old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6198. new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
  6199. // we can only reallocate the last object
  6200. assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
  6201. // try to reallocate the object inplace
  6202. if (ptr && _root_size - old_size + new_size <= _root->capacity)
  6203. {
  6204. _root_size = _root_size - old_size + new_size;
  6205. return ptr;
  6206. }
  6207. // allocate a new block
  6208. void* result = allocate(new_size);
  6209. if (!result) return 0;
  6210. // we have a new block
  6211. if (ptr)
  6212. {
  6213. // copy old data (we only support growing)
  6214. assert(new_size >= old_size);
  6215. memcpy(result, ptr, old_size);
  6216. // free the previous page if it had no other objects
  6217. assert(_root->data == result);
  6218. assert(_root->next);
  6219. if (_root->next->data == ptr)
  6220. {
  6221. // deallocate the whole page, unless it was the first one
  6222. xpath_memory_block* next = _root->next->next;
  6223. if (next)
  6224. {
  6225. xml_memory::deallocate(_root->next);
  6226. _root->next = next;
  6227. }
  6228. }
  6229. }
  6230. return result;
  6231. }
  6232. void revert(const xpath_allocator& state)
  6233. {
  6234. // free all new pages
  6235. xpath_memory_block* cur = _root;
  6236. while (cur != state._root)
  6237. {
  6238. xpath_memory_block* next = cur->next;
  6239. xml_memory::deallocate(cur);
  6240. cur = next;
  6241. }
  6242. // restore state
  6243. _root = state._root;
  6244. _root_size = state._root_size;
  6245. }
  6246. void release()
  6247. {
  6248. xpath_memory_block* cur = _root;
  6249. assert(cur);
  6250. while (cur->next)
  6251. {
  6252. xpath_memory_block* next = cur->next;
  6253. xml_memory::deallocate(cur);
  6254. cur = next;
  6255. }
  6256. }
  6257. };
  6258. struct xpath_allocator_capture
  6259. {
  6260. xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
  6261. {
  6262. }
  6263. ~xpath_allocator_capture()
  6264. {
  6265. _target->revert(_state);
  6266. }
  6267. xpath_allocator* _target;
  6268. xpath_allocator _state;
  6269. };
  6270. struct xpath_stack
  6271. {
  6272. xpath_allocator* result;
  6273. xpath_allocator* temp;
  6274. };
  6275. struct xpath_stack_data
  6276. {
  6277. xpath_memory_block blocks[2];
  6278. xpath_allocator result;
  6279. xpath_allocator temp;
  6280. xpath_stack stack;
  6281. bool oom;
  6282. xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
  6283. {
  6284. blocks[0].next = blocks[1].next = 0;
  6285. blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
  6286. stack.result = &result;
  6287. stack.temp = &temp;
  6288. }
  6289. ~xpath_stack_data()
  6290. {
  6291. result.release();
  6292. temp.release();
  6293. }
  6294. };
  6295. PUGI__NS_END
  6296. // String class
  6297. PUGI__NS_BEGIN
  6298. class xpath_string
  6299. {
  6300. const char_t* _buffer;
  6301. bool _uses_heap;
  6302. size_t _length_heap;
  6303. static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
  6304. {
  6305. char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
  6306. if (!result) return 0;
  6307. memcpy(result, string, length * sizeof(char_t));
  6308. result[length] = 0;
  6309. return result;
  6310. }
  6311. xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
  6312. {
  6313. }
  6314. public:
  6315. static xpath_string from_const(const char_t* str)
  6316. {
  6317. return xpath_string(str, false, 0);
  6318. }
  6319. static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
  6320. {
  6321. assert(begin <= end && *end == 0);
  6322. return xpath_string(begin, true, static_cast<size_t>(end - begin));
  6323. }
  6324. static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
  6325. {
  6326. assert(begin <= end);
  6327. if (begin == end)
  6328. return xpath_string();
  6329. size_t length = static_cast<size_t>(end - begin);
  6330. const char_t* data = duplicate_string(begin, length, alloc);
  6331. return data ? xpath_string(data, true, length) : xpath_string();
  6332. }
  6333. xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
  6334. {
  6335. }
  6336. void append(const xpath_string& o, xpath_allocator* alloc)
  6337. {
  6338. // skip empty sources
  6339. if (!*o._buffer) return;
  6340. // fast append for constant empty target and constant source
  6341. if (!*_buffer && !_uses_heap && !o._uses_heap)
  6342. {
  6343. _buffer = o._buffer;
  6344. }
  6345. else
  6346. {
  6347. // need to make heap copy
  6348. size_t target_length = length();
  6349. size_t source_length = o.length();
  6350. size_t result_length = target_length + source_length;
  6351. // allocate new buffer
  6352. char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
  6353. if (!result) return;
  6354. // append first string to the new buffer in case there was no reallocation
  6355. if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
  6356. // append second string to the new buffer
  6357. memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
  6358. result[result_length] = 0;
  6359. // finalize
  6360. _buffer = result;
  6361. _uses_heap = true;
  6362. _length_heap = result_length;
  6363. }
  6364. }
  6365. const char_t* c_str() const
  6366. {
  6367. return _buffer;
  6368. }
  6369. size_t length() const
  6370. {
  6371. return _uses_heap ? _length_heap : strlength(_buffer);
  6372. }
  6373. char_t* data(xpath_allocator* alloc)
  6374. {
  6375. // make private heap copy
  6376. if (!_uses_heap)
  6377. {
  6378. size_t length_ = strlength(_buffer);
  6379. const char_t* data_ = duplicate_string(_buffer, length_, alloc);
  6380. if (!data_) return 0;
  6381. _buffer = data_;
  6382. _uses_heap = true;
  6383. _length_heap = length_;
  6384. }
  6385. return const_cast<char_t*>(_buffer);
  6386. }
  6387. bool empty() const
  6388. {
  6389. return *_buffer == 0;
  6390. }
  6391. bool operator==(const xpath_string& o) const
  6392. {
  6393. return strequal(_buffer, o._buffer);
  6394. }
  6395. bool operator!=(const xpath_string& o) const
  6396. {
  6397. return !strequal(_buffer, o._buffer);
  6398. }
  6399. bool uses_heap() const
  6400. {
  6401. return _uses_heap;
  6402. }
  6403. };
  6404. PUGI__NS_END
  6405. PUGI__NS_BEGIN
  6406. PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
  6407. {
  6408. while (*pattern && *string == *pattern)
  6409. {
  6410. string++;
  6411. pattern++;
  6412. }
  6413. return *pattern == 0;
  6414. }
  6415. PUGI__FN const char_t* find_char(const char_t* s, char_t c)
  6416. {
  6417. #ifdef PUGIXML_WCHAR_MODE
  6418. return wcschr(s, c);
  6419. #else
  6420. return strchr(s, c);
  6421. #endif
  6422. }
  6423. PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
  6424. {
  6425. #ifdef PUGIXML_WCHAR_MODE
  6426. // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
  6427. return (*p == 0) ? s : wcsstr(s, p);
  6428. #else
  6429. return strstr(s, p);
  6430. #endif
  6431. }
  6432. // Converts symbol to lower case, if it is an ASCII one
  6433. PUGI__FN char_t tolower_ascii(char_t ch)
  6434. {
  6435. return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
  6436. }
  6437. PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
  6438. {
  6439. if (na.attribute())
  6440. return xpath_string::from_const(na.attribute().value());
  6441. else
  6442. {
  6443. xml_node n = na.node();
  6444. switch (n.type())
  6445. {
  6446. case node_pcdata:
  6447. case node_cdata:
  6448. case node_comment:
  6449. case node_pi:
  6450. return xpath_string::from_const(n.value());
  6451. case node_document:
  6452. case node_element:
  6453. {
  6454. xpath_string result;
  6455. // element nodes can have value if parse_embed_pcdata was used
  6456. if (n.value()[0])
  6457. result.append(xpath_string::from_const(n.value()), alloc);
  6458. xml_node cur = n.first_child();
  6459. while (cur && cur != n)
  6460. {
  6461. if (cur.type() == node_pcdata || cur.type() == node_cdata)
  6462. result.append(xpath_string::from_const(cur.value()), alloc);
  6463. if (cur.first_child())
  6464. cur = cur.first_child();
  6465. else if (cur.next_sibling())
  6466. cur = cur.next_sibling();
  6467. else
  6468. {
  6469. while (!cur.next_sibling() && cur != n)
  6470. cur = cur.parent();
  6471. if (cur != n) cur = cur.next_sibling();
  6472. }
  6473. }
  6474. return result;
  6475. }
  6476. default:
  6477. return xpath_string();
  6478. }
  6479. }
  6480. }
  6481. PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
  6482. {
  6483. assert(ln->parent == rn->parent);
  6484. // there is no common ancestor (the shared parent is null), nodes are from different documents
  6485. if (!ln->parent) return ln < rn;
  6486. // determine sibling order
  6487. xml_node_struct* ls = ln;
  6488. xml_node_struct* rs = rn;
  6489. while (ls && rs)
  6490. {
  6491. if (ls == rn) return true;
  6492. if (rs == ln) return false;
  6493. ls = ls->next_sibling;
  6494. rs = rs->next_sibling;
  6495. }
  6496. // if rn sibling chain ended ln must be before rn
  6497. return !rs;
  6498. }
  6499. PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
  6500. {
  6501. // find common ancestor at the same depth, if any
  6502. xml_node_struct* lp = ln;
  6503. xml_node_struct* rp = rn;
  6504. while (lp && rp && lp->parent != rp->parent)
  6505. {
  6506. lp = lp->parent;
  6507. rp = rp->parent;
  6508. }
  6509. // parents are the same!
  6510. if (lp && rp) return node_is_before_sibling(lp, rp);
  6511. // nodes are at different depths, need to normalize heights
  6512. bool left_higher = !lp;
  6513. while (lp)
  6514. {
  6515. lp = lp->parent;
  6516. ln = ln->parent;
  6517. }
  6518. while (rp)
  6519. {
  6520. rp = rp->parent;
  6521. rn = rn->parent;
  6522. }
  6523. // one node is the ancestor of the other
  6524. if (ln == rn) return left_higher;
  6525. // find common ancestor... again
  6526. while (ln->parent != rn->parent)
  6527. {
  6528. ln = ln->parent;
  6529. rn = rn->parent;
  6530. }
  6531. return node_is_before_sibling(ln, rn);
  6532. }
  6533. PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
  6534. {
  6535. while (node && node != parent) node = node->parent;
  6536. return parent && node == parent;
  6537. }
  6538. PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
  6539. {
  6540. xml_node_struct* node = xnode.node().internal_object();
  6541. if (node)
  6542. {
  6543. if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
  6544. {
  6545. if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
  6546. if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
  6547. }
  6548. return 0;
  6549. }
  6550. xml_attribute_struct* attr = xnode.attribute().internal_object();
  6551. if (attr)
  6552. {
  6553. if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
  6554. {
  6555. if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
  6556. if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
  6557. }
  6558. return 0;
  6559. }
  6560. return 0;
  6561. }
  6562. struct document_order_comparator
  6563. {
  6564. bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
  6565. {
  6566. // optimized document order based check
  6567. const void* lo = document_buffer_order(lhs);
  6568. const void* ro = document_buffer_order(rhs);
  6569. if (lo && ro) return lo < ro;
  6570. // slow comparison
  6571. xml_node ln = lhs.node(), rn = rhs.node();
  6572. // compare attributes
  6573. if (lhs.attribute() && rhs.attribute())
  6574. {
  6575. // shared parent
  6576. if (lhs.parent() == rhs.parent())
  6577. {
  6578. // determine sibling order
  6579. for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
  6580. if (a == rhs.attribute())
  6581. return true;
  6582. return false;
  6583. }
  6584. // compare attribute parents
  6585. ln = lhs.parent();
  6586. rn = rhs.parent();
  6587. }
  6588. else if (lhs.attribute())
  6589. {
  6590. // attributes go after the parent element
  6591. if (lhs.parent() == rhs.node()) return false;
  6592. ln = lhs.parent();
  6593. }
  6594. else if (rhs.attribute())
  6595. {
  6596. // attributes go after the parent element
  6597. if (rhs.parent() == lhs.node()) return true;
  6598. rn = rhs.parent();
  6599. }
  6600. if (ln == rn) return false;
  6601. if (!ln || !rn) return ln < rn;
  6602. return node_is_before(ln.internal_object(), rn.internal_object());
  6603. }
  6604. };
  6605. PUGI__FN double gen_nan()
  6606. {
  6607. #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
  6608. PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
  6609. typedef uint32_t UI; // BCC5 workaround
  6610. union { float f; UI i; } u;
  6611. u.i = 0x7fc00000;
  6612. return double(u.f);
  6613. #else
  6614. // fallback
  6615. const volatile double zero = 0.0;
  6616. return zero / zero;
  6617. #endif
  6618. }
  6619. PUGI__FN bool is_nan(double value)
  6620. {
  6621. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6622. return !!_isnan(value);
  6623. #elif defined(fpclassify) && defined(FP_NAN)
  6624. return fpclassify(value) == FP_NAN;
  6625. #else
  6626. // fallback
  6627. const volatile double v = value;
  6628. return v != v;
  6629. #endif
  6630. }
  6631. PUGI__FN const char_t* convert_number_to_string_special(double value)
  6632. {
  6633. #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
  6634. if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
  6635. if (_isnan(value)) return PUGIXML_TEXT("NaN");
  6636. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6637. #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
  6638. switch (fpclassify(value))
  6639. {
  6640. case FP_NAN:
  6641. return PUGIXML_TEXT("NaN");
  6642. case FP_INFINITE:
  6643. return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6644. case FP_ZERO:
  6645. return PUGIXML_TEXT("0");
  6646. default:
  6647. return 0;
  6648. }
  6649. #else
  6650. // fallback
  6651. const volatile double v = value;
  6652. if (v == 0) return PUGIXML_TEXT("0");
  6653. if (v != v) return PUGIXML_TEXT("NaN");
  6654. if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
  6655. return 0;
  6656. #endif
  6657. }
  6658. PUGI__FN bool convert_number_to_boolean(double value)
  6659. {
  6660. return (value != 0 && !is_nan(value));
  6661. }
  6662. PUGI__FN void truncate_zeros(char* begin, char* end)
  6663. {
  6664. while (begin != end && end[-1] == '0') end--;
  6665. *end = 0;
  6666. }
  6667. // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
  6668. #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
  6669. PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6670. {
  6671. // get base values
  6672. int sign, exponent;
  6673. _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
  6674. // truncate redundant zeros
  6675. truncate_zeros(buffer, buffer + strlen(buffer));
  6676. // fill results
  6677. *out_mantissa = buffer;
  6678. *out_exponent = exponent;
  6679. }
  6680. #else
  6681. PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
  6682. {
  6683. // get a scientific notation value with IEEE DBL_DIG decimals
  6684. PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
  6685. // get the exponent (possibly negative)
  6686. char* exponent_string = strchr(buffer, 'e');
  6687. assert(exponent_string);
  6688. int exponent = atoi(exponent_string + 1);
  6689. // extract mantissa string: skip sign
  6690. char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
  6691. assert(mantissa[0] != '0' && mantissa[1] == '.');
  6692. // divide mantissa by 10 to eliminate integer part
  6693. mantissa[1] = mantissa[0];
  6694. mantissa++;
  6695. exponent++;
  6696. // remove extra mantissa digits and zero-terminate mantissa
  6697. truncate_zeros(mantissa, exponent_string);
  6698. // fill results
  6699. *out_mantissa = mantissa;
  6700. *out_exponent = exponent;
  6701. }
  6702. #endif
  6703. PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
  6704. {
  6705. // try special number conversion
  6706. const char_t* special = convert_number_to_string_special(value);
  6707. if (special) return xpath_string::from_const(special);
  6708. // get mantissa + exponent form
  6709. char mantissa_buffer[32];
  6710. char* mantissa;
  6711. int exponent;
  6712. convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
  6713. // allocate a buffer of suitable length for the number
  6714. size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
  6715. char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
  6716. if (!result) return xpath_string();
  6717. // make the number!
  6718. char_t* s = result;
  6719. // sign
  6720. if (value < 0) *s++ = '-';
  6721. // integer part
  6722. if (exponent <= 0)
  6723. {
  6724. *s++ = '0';
  6725. }
  6726. else
  6727. {
  6728. while (exponent > 0)
  6729. {
  6730. assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
  6731. *s++ = *mantissa ? *mantissa++ : '0';
  6732. exponent--;
  6733. }
  6734. }
  6735. // fractional part
  6736. if (*mantissa)
  6737. {
  6738. // decimal point
  6739. *s++ = '.';
  6740. // extra zeroes from negative exponent
  6741. while (exponent < 0)
  6742. {
  6743. *s++ = '0';
  6744. exponent++;
  6745. }
  6746. // extra mantissa digits
  6747. while (*mantissa)
  6748. {
  6749. assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
  6750. *s++ = *mantissa++;
  6751. }
  6752. }
  6753. // zero-terminate
  6754. assert(s < result + result_size);
  6755. *s = 0;
  6756. return xpath_string::from_heap_preallocated(result, s);
  6757. }
  6758. PUGI__FN bool check_string_to_number_format(const char_t* string)
  6759. {
  6760. // parse leading whitespace
  6761. while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
  6762. // parse sign
  6763. if (*string == '-') ++string;
  6764. if (!*string) return false;
  6765. // if there is no integer part, there should be a decimal part with at least one digit
  6766. if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
  6767. // parse integer part
  6768. while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6769. // parse decimal part
  6770. if (*string == '.')
  6771. {
  6772. ++string;
  6773. while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
  6774. }
  6775. // parse trailing whitespace
  6776. while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
  6777. return *string == 0;
  6778. }
  6779. PUGI__FN double convert_string_to_number(const char_t* string)
  6780. {
  6781. // check string format
  6782. if (!check_string_to_number_format(string)) return gen_nan();
  6783. // parse string
  6784. #ifdef PUGIXML_WCHAR_MODE
  6785. return wcstod(string, 0);
  6786. #else
  6787. return strtod(string, 0);
  6788. #endif
  6789. }
  6790. PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
  6791. {
  6792. size_t length = static_cast<size_t>(end - begin);
  6793. char_t* scratch = buffer;
  6794. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  6795. {
  6796. // need to make dummy on-heap copy
  6797. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  6798. if (!scratch) return false;
  6799. }
  6800. // copy string to zero-terminated buffer and perform conversion
  6801. memcpy(scratch, begin, length * sizeof(char_t));
  6802. scratch[length] = 0;
  6803. *out_result = convert_string_to_number(scratch);
  6804. // free dummy buffer
  6805. if (scratch != buffer) xml_memory::deallocate(scratch);
  6806. return true;
  6807. }
  6808. PUGI__FN double round_nearest(double value)
  6809. {
  6810. return floor(value + 0.5);
  6811. }
  6812. PUGI__FN double round_nearest_nzero(double value)
  6813. {
  6814. // same as round_nearest, but returns -0 for [-0.5, -0]
  6815. // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
  6816. return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
  6817. }
  6818. PUGI__FN const char_t* qualified_name(const xpath_node& node)
  6819. {
  6820. return node.attribute() ? node.attribute().name() : node.node().name();
  6821. }
  6822. PUGI__FN const char_t* local_name(const xpath_node& node)
  6823. {
  6824. const char_t* name = qualified_name(node);
  6825. const char_t* p = find_char(name, ':');
  6826. return p ? p + 1 : name;
  6827. }
  6828. struct namespace_uri_predicate
  6829. {
  6830. const char_t* prefix;
  6831. size_t prefix_length;
  6832. namespace_uri_predicate(const char_t* name)
  6833. {
  6834. const char_t* pos = find_char(name, ':');
  6835. prefix = pos ? name : 0;
  6836. prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
  6837. }
  6838. bool operator()(xml_attribute a) const
  6839. {
  6840. const char_t* name = a.name();
  6841. if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
  6842. return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
  6843. }
  6844. };
  6845. PUGI__FN const char_t* namespace_uri(xml_node node)
  6846. {
  6847. namespace_uri_predicate pred = node.name();
  6848. xml_node p = node;
  6849. while (p)
  6850. {
  6851. xml_attribute a = p.find_attribute(pred);
  6852. if (a) return a.value();
  6853. p = p.parent();
  6854. }
  6855. return PUGIXML_TEXT("");
  6856. }
  6857. PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
  6858. {
  6859. namespace_uri_predicate pred = attr.name();
  6860. // Default namespace does not apply to attributes
  6861. if (!pred.prefix) return PUGIXML_TEXT("");
  6862. xml_node p = parent;
  6863. while (p)
  6864. {
  6865. xml_attribute a = p.find_attribute(pred);
  6866. if (a) return a.value();
  6867. p = p.parent();
  6868. }
  6869. return PUGIXML_TEXT("");
  6870. }
  6871. PUGI__FN const char_t* namespace_uri(const xpath_node& node)
  6872. {
  6873. return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
  6874. }
  6875. PUGI__FN char_t* normalize_space(char_t* buffer)
  6876. {
  6877. char_t* write = buffer;
  6878. for (char_t* it = buffer; *it; )
  6879. {
  6880. char_t ch = *it++;
  6881. if (PUGI__IS_CHARTYPE(ch, ct_space))
  6882. {
  6883. // replace whitespace sequence with single space
  6884. while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
  6885. // avoid leading spaces
  6886. if (write != buffer) *write++ = ' ';
  6887. }
  6888. else *write++ = ch;
  6889. }
  6890. // remove trailing space
  6891. if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
  6892. // zero-terminate
  6893. *write = 0;
  6894. return write;
  6895. }
  6896. PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
  6897. {
  6898. char_t* write = buffer;
  6899. while (*buffer)
  6900. {
  6901. PUGI__DMC_VOLATILE char_t ch = *buffer++;
  6902. const char_t* pos = find_char(from, ch);
  6903. if (!pos)
  6904. *write++ = ch; // do not process
  6905. else if (static_cast<size_t>(pos - from) < to_length)
  6906. *write++ = to[pos - from]; // replace
  6907. }
  6908. // zero-terminate
  6909. *write = 0;
  6910. return write;
  6911. }
  6912. PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
  6913. {
  6914. unsigned char table[128] = {0};
  6915. while (*from)
  6916. {
  6917. unsigned int fc = static_cast<unsigned int>(*from);
  6918. unsigned int tc = static_cast<unsigned int>(*to);
  6919. if (fc >= 128 || tc >= 128)
  6920. return 0;
  6921. // code=128 means "skip character"
  6922. if (!table[fc])
  6923. table[fc] = static_cast<unsigned char>(tc ? tc : 128);
  6924. from++;
  6925. if (tc) to++;
  6926. }
  6927. for (int i = 0; i < 128; ++i)
  6928. if (!table[i])
  6929. table[i] = static_cast<unsigned char>(i);
  6930. void* result = alloc->allocate(sizeof(table));
  6931. if (!result) return 0;
  6932. memcpy(result, table, sizeof(table));
  6933. return static_cast<unsigned char*>(result);
  6934. }
  6935. PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
  6936. {
  6937. char_t* write = buffer;
  6938. while (*buffer)
  6939. {
  6940. char_t ch = *buffer++;
  6941. unsigned int index = static_cast<unsigned int>(ch);
  6942. if (index < 128)
  6943. {
  6944. unsigned char code = table[index];
  6945. // code=128 means "skip character" (table size is 128 so 128 can be a special value)
  6946. // this code skips these characters without extra branches
  6947. *write = static_cast<char_t>(code);
  6948. write += 1 - (code >> 7);
  6949. }
  6950. else
  6951. {
  6952. *write++ = ch;
  6953. }
  6954. }
  6955. // zero-terminate
  6956. *write = 0;
  6957. return write;
  6958. }
  6959. inline bool is_xpath_attribute(const char_t* name)
  6960. {
  6961. return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
  6962. }
  6963. struct xpath_variable_boolean: xpath_variable
  6964. {
  6965. xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
  6966. {
  6967. }
  6968. bool value;
  6969. char_t name[1];
  6970. };
  6971. struct xpath_variable_number: xpath_variable
  6972. {
  6973. xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
  6974. {
  6975. }
  6976. double value;
  6977. char_t name[1];
  6978. };
  6979. struct xpath_variable_string: xpath_variable
  6980. {
  6981. xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
  6982. {
  6983. }
  6984. ~xpath_variable_string()
  6985. {
  6986. if (value) xml_memory::deallocate(value);
  6987. }
  6988. char_t* value;
  6989. char_t name[1];
  6990. };
  6991. struct xpath_variable_node_set: xpath_variable
  6992. {
  6993. xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
  6994. {
  6995. }
  6996. xpath_node_set value;
  6997. char_t name[1];
  6998. };
  6999. static const xpath_node_set dummy_node_set;
  7000. PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
  7001. {
  7002. // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
  7003. unsigned int result = 0;
  7004. while (*str)
  7005. {
  7006. result += static_cast<unsigned int>(*str++);
  7007. result += result << 10;
  7008. result ^= result >> 6;
  7009. }
  7010. result += result << 3;
  7011. result ^= result >> 11;
  7012. result += result << 15;
  7013. return result;
  7014. }
  7015. template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
  7016. {
  7017. size_t length = strlength(name);
  7018. if (length == 0) return 0; // empty variable names are invalid
  7019. // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
  7020. void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
  7021. if (!memory) return 0;
  7022. T* result = new (memory) T();
  7023. memcpy(result->name, name, (length + 1) * sizeof(char_t));
  7024. return result;
  7025. }
  7026. PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
  7027. {
  7028. switch (type)
  7029. {
  7030. case xpath_type_node_set:
  7031. return new_xpath_variable<xpath_variable_node_set>(name);
  7032. case xpath_type_number:
  7033. return new_xpath_variable<xpath_variable_number>(name);
  7034. case xpath_type_string:
  7035. return new_xpath_variable<xpath_variable_string>(name);
  7036. case xpath_type_boolean:
  7037. return new_xpath_variable<xpath_variable_boolean>(name);
  7038. default:
  7039. return 0;
  7040. }
  7041. }
  7042. template <typename T> PUGI__FN void delete_xpath_variable(T* var)
  7043. {
  7044. var->~T();
  7045. xml_memory::deallocate(var);
  7046. }
  7047. PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
  7048. {
  7049. switch (type)
  7050. {
  7051. case xpath_type_node_set:
  7052. delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
  7053. break;
  7054. case xpath_type_number:
  7055. delete_xpath_variable(static_cast<xpath_variable_number*>(var));
  7056. break;
  7057. case xpath_type_string:
  7058. delete_xpath_variable(static_cast<xpath_variable_string*>(var));
  7059. break;
  7060. case xpath_type_boolean:
  7061. delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
  7062. break;
  7063. default:
  7064. assert(false && "Invalid variable type"); // unreachable
  7065. }
  7066. }
  7067. PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
  7068. {
  7069. switch (rhs->type())
  7070. {
  7071. case xpath_type_node_set:
  7072. return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
  7073. case xpath_type_number:
  7074. return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
  7075. case xpath_type_string:
  7076. return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
  7077. case xpath_type_boolean:
  7078. return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
  7079. default:
  7080. assert(false && "Invalid variable type"); // unreachable
  7081. return false;
  7082. }
  7083. }
  7084. PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
  7085. {
  7086. size_t length = static_cast<size_t>(end - begin);
  7087. char_t* scratch = buffer;
  7088. if (length >= sizeof(buffer) / sizeof(buffer[0]))
  7089. {
  7090. // need to make dummy on-heap copy
  7091. scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
  7092. if (!scratch) return false;
  7093. }
  7094. // copy string to zero-terminated buffer and perform lookup
  7095. memcpy(scratch, begin, length * sizeof(char_t));
  7096. scratch[length] = 0;
  7097. *out_result = set->get(scratch);
  7098. // free dummy buffer
  7099. if (scratch != buffer) xml_memory::deallocate(scratch);
  7100. return true;
  7101. }
  7102. PUGI__NS_END
  7103. // Internal node set class
  7104. PUGI__NS_BEGIN
  7105. PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
  7106. {
  7107. if (end - begin < 2)
  7108. return xpath_node_set::type_sorted;
  7109. document_order_comparator cmp;
  7110. bool first = cmp(begin[0], begin[1]);
  7111. for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
  7112. if (cmp(it[0], it[1]) != first)
  7113. return xpath_node_set::type_unsorted;
  7114. return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
  7115. }
  7116. PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
  7117. {
  7118. xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  7119. if (type == xpath_node_set::type_unsorted)
  7120. {
  7121. xpath_node_set::type_t sorted = xpath_get_order(begin, end);
  7122. if (sorted == xpath_node_set::type_unsorted)
  7123. {
  7124. sort(begin, end, document_order_comparator());
  7125. type = xpath_node_set::type_sorted;
  7126. }
  7127. else
  7128. type = sorted;
  7129. }
  7130. if (type != order) reverse(begin, end);
  7131. return order;
  7132. }
  7133. PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
  7134. {
  7135. if (begin == end) return xpath_node();
  7136. switch (type)
  7137. {
  7138. case xpath_node_set::type_sorted:
  7139. return *begin;
  7140. case xpath_node_set::type_sorted_reverse:
  7141. return *(end - 1);
  7142. case xpath_node_set::type_unsorted:
  7143. return *min_element(begin, end, document_order_comparator());
  7144. default:
  7145. assert(false && "Invalid node set type"); // unreachable
  7146. return xpath_node();
  7147. }
  7148. }
  7149. class xpath_node_set_raw
  7150. {
  7151. xpath_node_set::type_t _type;
  7152. xpath_node* _begin;
  7153. xpath_node* _end;
  7154. xpath_node* _eos;
  7155. public:
  7156. xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
  7157. {
  7158. }
  7159. xpath_node* begin() const
  7160. {
  7161. return _begin;
  7162. }
  7163. xpath_node* end() const
  7164. {
  7165. return _end;
  7166. }
  7167. bool empty() const
  7168. {
  7169. return _begin == _end;
  7170. }
  7171. size_t size() const
  7172. {
  7173. return static_cast<size_t>(_end - _begin);
  7174. }
  7175. xpath_node first() const
  7176. {
  7177. return xpath_first(_begin, _end, _type);
  7178. }
  7179. void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
  7180. void push_back(const xpath_node& node, xpath_allocator* alloc)
  7181. {
  7182. if (_end != _eos)
  7183. *_end++ = node;
  7184. else
  7185. push_back_grow(node, alloc);
  7186. }
  7187. void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
  7188. {
  7189. if (begin_ == end_) return;
  7190. size_t size_ = static_cast<size_t>(_end - _begin);
  7191. size_t capacity = static_cast<size_t>(_eos - _begin);
  7192. size_t count = static_cast<size_t>(end_ - begin_);
  7193. if (size_ + count > capacity)
  7194. {
  7195. // reallocate the old array or allocate a new one
  7196. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
  7197. if (!data) return;
  7198. // finalize
  7199. _begin = data;
  7200. _end = data + size_;
  7201. _eos = data + size_ + count;
  7202. }
  7203. memcpy(_end, begin_, count * sizeof(xpath_node));
  7204. _end += count;
  7205. }
  7206. void sort_do()
  7207. {
  7208. _type = xpath_sort(_begin, _end, _type, false);
  7209. }
  7210. void truncate(xpath_node* pos)
  7211. {
  7212. assert(_begin <= pos && pos <= _end);
  7213. _end = pos;
  7214. }
  7215. void remove_duplicates(xpath_allocator* alloc)
  7216. {
  7217. if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
  7218. {
  7219. xpath_allocator_capture cr(alloc);
  7220. size_t size_ = static_cast<size_t>(_end - _begin);
  7221. size_t hash_size = 1;
  7222. while (hash_size < size_ + size_ / 2) hash_size *= 2;
  7223. const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
  7224. if (!hash_data) return;
  7225. memset(hash_data, 0, hash_size * sizeof(const void**));
  7226. xpath_node* write = _begin;
  7227. for (xpath_node* it = _begin; it != _end; ++it)
  7228. {
  7229. const void* attr = it->attribute().internal_object();
  7230. const void* node = it->node().internal_object();
  7231. const void* key = attr ? attr : node;
  7232. if (key && hash_insert(hash_data, hash_size, key))
  7233. {
  7234. *write++ = *it;
  7235. }
  7236. }
  7237. _end = write;
  7238. }
  7239. else
  7240. {
  7241. _end = unique(_begin, _end);
  7242. }
  7243. }
  7244. xpath_node_set::type_t type() const
  7245. {
  7246. return _type;
  7247. }
  7248. void set_type(xpath_node_set::type_t value)
  7249. {
  7250. _type = value;
  7251. }
  7252. };
  7253. PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
  7254. {
  7255. size_t capacity = static_cast<size_t>(_eos - _begin);
  7256. // get new capacity (1.5x rule)
  7257. size_t new_capacity = capacity + capacity / 2 + 1;
  7258. // reallocate the old array or allocate a new one
  7259. xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
  7260. if (!data) return;
  7261. // finalize
  7262. _begin = data;
  7263. _end = data + capacity;
  7264. _eos = data + new_capacity;
  7265. // push
  7266. *_end++ = node;
  7267. }
  7268. PUGI__NS_END
  7269. PUGI__NS_BEGIN
  7270. struct xpath_context
  7271. {
  7272. xpath_node n;
  7273. size_t position, size;
  7274. xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
  7275. {
  7276. }
  7277. };
  7278. enum lexeme_t
  7279. {
  7280. lex_none = 0,
  7281. lex_equal,
  7282. lex_not_equal,
  7283. lex_less,
  7284. lex_greater,
  7285. lex_less_or_equal,
  7286. lex_greater_or_equal,
  7287. lex_plus,
  7288. lex_minus,
  7289. lex_multiply,
  7290. lex_union,
  7291. lex_var_ref,
  7292. lex_open_brace,
  7293. lex_close_brace,
  7294. lex_quoted_string,
  7295. lex_number,
  7296. lex_slash,
  7297. lex_double_slash,
  7298. lex_open_square_brace,
  7299. lex_close_square_brace,
  7300. lex_string,
  7301. lex_comma,
  7302. lex_axis_attribute,
  7303. lex_dot,
  7304. lex_double_dot,
  7305. lex_double_colon,
  7306. lex_eof
  7307. };
  7308. struct xpath_lexer_string
  7309. {
  7310. const char_t* begin;
  7311. const char_t* end;
  7312. xpath_lexer_string(): begin(0), end(0)
  7313. {
  7314. }
  7315. bool operator==(const char_t* other) const
  7316. {
  7317. size_t length = static_cast<size_t>(end - begin);
  7318. return strequalrange(other, begin, length);
  7319. }
  7320. };
  7321. class xpath_lexer
  7322. {
  7323. const char_t* _cur;
  7324. const char_t* _cur_lexeme_pos;
  7325. xpath_lexer_string _cur_lexeme_contents;
  7326. lexeme_t _cur_lexeme;
  7327. public:
  7328. explicit xpath_lexer(const char_t* query): _cur(query)
  7329. {
  7330. next();
  7331. }
  7332. const char_t* state() const
  7333. {
  7334. return _cur;
  7335. }
  7336. void next()
  7337. {
  7338. const char_t* cur = _cur;
  7339. while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
  7340. // save lexeme position for error reporting
  7341. _cur_lexeme_pos = cur;
  7342. switch (*cur)
  7343. {
  7344. case 0:
  7345. _cur_lexeme = lex_eof;
  7346. break;
  7347. case '>':
  7348. if (*(cur+1) == '=')
  7349. {
  7350. cur += 2;
  7351. _cur_lexeme = lex_greater_or_equal;
  7352. }
  7353. else
  7354. {
  7355. cur += 1;
  7356. _cur_lexeme = lex_greater;
  7357. }
  7358. break;
  7359. case '<':
  7360. if (*(cur+1) == '=')
  7361. {
  7362. cur += 2;
  7363. _cur_lexeme = lex_less_or_equal;
  7364. }
  7365. else
  7366. {
  7367. cur += 1;
  7368. _cur_lexeme = lex_less;
  7369. }
  7370. break;
  7371. case '!':
  7372. if (*(cur+1) == '=')
  7373. {
  7374. cur += 2;
  7375. _cur_lexeme = lex_not_equal;
  7376. }
  7377. else
  7378. {
  7379. _cur_lexeme = lex_none;
  7380. }
  7381. break;
  7382. case '=':
  7383. cur += 1;
  7384. _cur_lexeme = lex_equal;
  7385. break;
  7386. case '+':
  7387. cur += 1;
  7388. _cur_lexeme = lex_plus;
  7389. break;
  7390. case '-':
  7391. cur += 1;
  7392. _cur_lexeme = lex_minus;
  7393. break;
  7394. case '*':
  7395. cur += 1;
  7396. _cur_lexeme = lex_multiply;
  7397. break;
  7398. case '|':
  7399. cur += 1;
  7400. _cur_lexeme = lex_union;
  7401. break;
  7402. case '$':
  7403. cur += 1;
  7404. if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
  7405. {
  7406. _cur_lexeme_contents.begin = cur;
  7407. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7408. if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
  7409. {
  7410. cur++; // :
  7411. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7412. }
  7413. _cur_lexeme_contents.end = cur;
  7414. _cur_lexeme = lex_var_ref;
  7415. }
  7416. else
  7417. {
  7418. _cur_lexeme = lex_none;
  7419. }
  7420. break;
  7421. case '(':
  7422. cur += 1;
  7423. _cur_lexeme = lex_open_brace;
  7424. break;
  7425. case ')':
  7426. cur += 1;
  7427. _cur_lexeme = lex_close_brace;
  7428. break;
  7429. case '[':
  7430. cur += 1;
  7431. _cur_lexeme = lex_open_square_brace;
  7432. break;
  7433. case ']':
  7434. cur += 1;
  7435. _cur_lexeme = lex_close_square_brace;
  7436. break;
  7437. case ',':
  7438. cur += 1;
  7439. _cur_lexeme = lex_comma;
  7440. break;
  7441. case '/':
  7442. if (*(cur+1) == '/')
  7443. {
  7444. cur += 2;
  7445. _cur_lexeme = lex_double_slash;
  7446. }
  7447. else
  7448. {
  7449. cur += 1;
  7450. _cur_lexeme = lex_slash;
  7451. }
  7452. break;
  7453. case '.':
  7454. if (*(cur+1) == '.')
  7455. {
  7456. cur += 2;
  7457. _cur_lexeme = lex_double_dot;
  7458. }
  7459. else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
  7460. {
  7461. _cur_lexeme_contents.begin = cur; // .
  7462. ++cur;
  7463. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7464. _cur_lexeme_contents.end = cur;
  7465. _cur_lexeme = lex_number;
  7466. }
  7467. else
  7468. {
  7469. cur += 1;
  7470. _cur_lexeme = lex_dot;
  7471. }
  7472. break;
  7473. case '@':
  7474. cur += 1;
  7475. _cur_lexeme = lex_axis_attribute;
  7476. break;
  7477. case '"':
  7478. case '\'':
  7479. {
  7480. char_t terminator = *cur;
  7481. ++cur;
  7482. _cur_lexeme_contents.begin = cur;
  7483. while (*cur && *cur != terminator) cur++;
  7484. _cur_lexeme_contents.end = cur;
  7485. if (!*cur)
  7486. _cur_lexeme = lex_none;
  7487. else
  7488. {
  7489. cur += 1;
  7490. _cur_lexeme = lex_quoted_string;
  7491. }
  7492. break;
  7493. }
  7494. case ':':
  7495. if (*(cur+1) == ':')
  7496. {
  7497. cur += 2;
  7498. _cur_lexeme = lex_double_colon;
  7499. }
  7500. else
  7501. {
  7502. _cur_lexeme = lex_none;
  7503. }
  7504. break;
  7505. default:
  7506. if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
  7507. {
  7508. _cur_lexeme_contents.begin = cur;
  7509. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7510. if (*cur == '.')
  7511. {
  7512. cur++;
  7513. while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
  7514. }
  7515. _cur_lexeme_contents.end = cur;
  7516. _cur_lexeme = lex_number;
  7517. }
  7518. else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
  7519. {
  7520. _cur_lexeme_contents.begin = cur;
  7521. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7522. if (cur[0] == ':')
  7523. {
  7524. if (cur[1] == '*') // namespace test ncname:*
  7525. {
  7526. cur += 2; // :*
  7527. }
  7528. else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
  7529. {
  7530. cur++; // :
  7531. while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
  7532. }
  7533. }
  7534. _cur_lexeme_contents.end = cur;
  7535. _cur_lexeme = lex_string;
  7536. }
  7537. else
  7538. {
  7539. _cur_lexeme = lex_none;
  7540. }
  7541. }
  7542. _cur = cur;
  7543. }
  7544. lexeme_t current() const
  7545. {
  7546. return _cur_lexeme;
  7547. }
  7548. const char_t* current_pos() const
  7549. {
  7550. return _cur_lexeme_pos;
  7551. }
  7552. const xpath_lexer_string& contents() const
  7553. {
  7554. assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
  7555. return _cur_lexeme_contents;
  7556. }
  7557. };
  7558. enum ast_type_t
  7559. {
  7560. ast_unknown,
  7561. ast_op_or, // left or right
  7562. ast_op_and, // left and right
  7563. ast_op_equal, // left = right
  7564. ast_op_not_equal, // left != right
  7565. ast_op_less, // left < right
  7566. ast_op_greater, // left > right
  7567. ast_op_less_or_equal, // left <= right
  7568. ast_op_greater_or_equal, // left >= right
  7569. ast_op_add, // left + right
  7570. ast_op_subtract, // left - right
  7571. ast_op_multiply, // left * right
  7572. ast_op_divide, // left / right
  7573. ast_op_mod, // left % right
  7574. ast_op_negate, // left - right
  7575. ast_op_union, // left | right
  7576. ast_predicate, // apply predicate to set; next points to next predicate
  7577. ast_filter, // select * from left where right
  7578. ast_string_constant, // string constant
  7579. ast_number_constant, // number constant
  7580. ast_variable, // variable
  7581. ast_func_last, // last()
  7582. ast_func_position, // position()
  7583. ast_func_count, // count(left)
  7584. ast_func_id, // id(left)
  7585. ast_func_local_name_0, // local-name()
  7586. ast_func_local_name_1, // local-name(left)
  7587. ast_func_namespace_uri_0, // namespace-uri()
  7588. ast_func_namespace_uri_1, // namespace-uri(left)
  7589. ast_func_name_0, // name()
  7590. ast_func_name_1, // name(left)
  7591. ast_func_string_0, // string()
  7592. ast_func_string_1, // string(left)
  7593. ast_func_concat, // concat(left, right, siblings)
  7594. ast_func_starts_with, // starts_with(left, right)
  7595. ast_func_contains, // contains(left, right)
  7596. ast_func_substring_before, // substring-before(left, right)
  7597. ast_func_substring_after, // substring-after(left, right)
  7598. ast_func_substring_2, // substring(left, right)
  7599. ast_func_substring_3, // substring(left, right, third)
  7600. ast_func_string_length_0, // string-length()
  7601. ast_func_string_length_1, // string-length(left)
  7602. ast_func_normalize_space_0, // normalize-space()
  7603. ast_func_normalize_space_1, // normalize-space(left)
  7604. ast_func_translate, // translate(left, right, third)
  7605. ast_func_boolean, // boolean(left)
  7606. ast_func_not, // not(left)
  7607. ast_func_true, // true()
  7608. ast_func_false, // false()
  7609. ast_func_lang, // lang(left)
  7610. ast_func_number_0, // number()
  7611. ast_func_number_1, // number(left)
  7612. ast_func_sum, // sum(left)
  7613. ast_func_floor, // floor(left)
  7614. ast_func_ceiling, // ceiling(left)
  7615. ast_func_round, // round(left)
  7616. ast_step, // process set left with step
  7617. ast_step_root, // select root node
  7618. ast_opt_translate_table, // translate(left, right, third) where right/third are constants
  7619. ast_opt_compare_attribute // @name = 'string'
  7620. };
  7621. enum axis_t
  7622. {
  7623. axis_ancestor,
  7624. axis_ancestor_or_self,
  7625. axis_attribute,
  7626. axis_child,
  7627. axis_descendant,
  7628. axis_descendant_or_self,
  7629. axis_following,
  7630. axis_following_sibling,
  7631. axis_namespace,
  7632. axis_parent,
  7633. axis_preceding,
  7634. axis_preceding_sibling,
  7635. axis_self
  7636. };
  7637. enum nodetest_t
  7638. {
  7639. nodetest_none,
  7640. nodetest_name,
  7641. nodetest_type_node,
  7642. nodetest_type_comment,
  7643. nodetest_type_pi,
  7644. nodetest_type_text,
  7645. nodetest_pi,
  7646. nodetest_all,
  7647. nodetest_all_in_namespace
  7648. };
  7649. enum predicate_t
  7650. {
  7651. predicate_default,
  7652. predicate_posinv,
  7653. predicate_constant,
  7654. predicate_constant_one
  7655. };
  7656. enum nodeset_eval_t
  7657. {
  7658. nodeset_eval_all,
  7659. nodeset_eval_any,
  7660. nodeset_eval_first
  7661. };
  7662. template <axis_t N> struct axis_to_type
  7663. {
  7664. static const axis_t axis;
  7665. };
  7666. template <axis_t N> const axis_t axis_to_type<N>::axis = N;
  7667. class xpath_ast_node
  7668. {
  7669. private:
  7670. // node type
  7671. char _type;
  7672. char _rettype;
  7673. // for ast_step
  7674. char _axis;
  7675. // for ast_step/ast_predicate/ast_filter
  7676. char _test;
  7677. // tree node structure
  7678. xpath_ast_node* _left;
  7679. xpath_ast_node* _right;
  7680. xpath_ast_node* _next;
  7681. union
  7682. {
  7683. // value for ast_string_constant
  7684. const char_t* string;
  7685. // value for ast_number_constant
  7686. double number;
  7687. // variable for ast_variable
  7688. xpath_variable* variable;
  7689. // node test for ast_step (node name/namespace/node type/pi target)
  7690. const char_t* nodetest;
  7691. // table for ast_opt_translate_table
  7692. const unsigned char* table;
  7693. } _data;
  7694. xpath_ast_node(const xpath_ast_node&);
  7695. xpath_ast_node& operator=(const xpath_ast_node&);
  7696. template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7697. {
  7698. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7699. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7700. {
  7701. if (lt == xpath_type_boolean || rt == xpath_type_boolean)
  7702. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7703. else if (lt == xpath_type_number || rt == xpath_type_number)
  7704. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7705. else if (lt == xpath_type_string || rt == xpath_type_string)
  7706. {
  7707. xpath_allocator_capture cr(stack.result);
  7708. xpath_string ls = lhs->eval_string(c, stack);
  7709. xpath_string rs = rhs->eval_string(c, stack);
  7710. return comp(ls, rs);
  7711. }
  7712. }
  7713. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7714. {
  7715. xpath_allocator_capture cr(stack.result);
  7716. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7717. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7718. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7719. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7720. {
  7721. xpath_allocator_capture cri(stack.result);
  7722. if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
  7723. return true;
  7724. }
  7725. return false;
  7726. }
  7727. else
  7728. {
  7729. if (lt == xpath_type_node_set)
  7730. {
  7731. swap(lhs, rhs);
  7732. swap(lt, rt);
  7733. }
  7734. if (lt == xpath_type_boolean)
  7735. return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
  7736. else if (lt == xpath_type_number)
  7737. {
  7738. xpath_allocator_capture cr(stack.result);
  7739. double l = lhs->eval_number(c, stack);
  7740. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7741. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7742. {
  7743. xpath_allocator_capture cri(stack.result);
  7744. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7745. return true;
  7746. }
  7747. return false;
  7748. }
  7749. else if (lt == xpath_type_string)
  7750. {
  7751. xpath_allocator_capture cr(stack.result);
  7752. xpath_string l = lhs->eval_string(c, stack);
  7753. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7754. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7755. {
  7756. xpath_allocator_capture cri(stack.result);
  7757. if (comp(l, string_value(*ri, stack.result)))
  7758. return true;
  7759. }
  7760. return false;
  7761. }
  7762. }
  7763. assert(false && "Wrong types"); // unreachable
  7764. return false;
  7765. }
  7766. static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
  7767. {
  7768. return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
  7769. }
  7770. template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
  7771. {
  7772. xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
  7773. if (lt != xpath_type_node_set && rt != xpath_type_node_set)
  7774. return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
  7775. else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
  7776. {
  7777. xpath_allocator_capture cr(stack.result);
  7778. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7779. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7780. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7781. {
  7782. xpath_allocator_capture cri(stack.result);
  7783. double l = convert_string_to_number(string_value(*li, stack.result).c_str());
  7784. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7785. {
  7786. xpath_allocator_capture crii(stack.result);
  7787. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7788. return true;
  7789. }
  7790. }
  7791. return false;
  7792. }
  7793. else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
  7794. {
  7795. xpath_allocator_capture cr(stack.result);
  7796. double l = lhs->eval_number(c, stack);
  7797. xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
  7798. for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
  7799. {
  7800. xpath_allocator_capture cri(stack.result);
  7801. if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
  7802. return true;
  7803. }
  7804. return false;
  7805. }
  7806. else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
  7807. {
  7808. xpath_allocator_capture cr(stack.result);
  7809. xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
  7810. double r = rhs->eval_number(c, stack);
  7811. for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
  7812. {
  7813. xpath_allocator_capture cri(stack.result);
  7814. if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
  7815. return true;
  7816. }
  7817. return false;
  7818. }
  7819. else
  7820. {
  7821. assert(false && "Wrong types"); // unreachable
  7822. return false;
  7823. }
  7824. }
  7825. static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7826. {
  7827. assert(ns.size() >= first);
  7828. assert(expr->rettype() != xpath_type_number);
  7829. size_t i = 1;
  7830. size_t size = ns.size() - first;
  7831. xpath_node* last = ns.begin() + first;
  7832. // remove_if... or well, sort of
  7833. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7834. {
  7835. xpath_context c(*it, i, size);
  7836. if (expr->eval_boolean(c, stack))
  7837. {
  7838. *last++ = *it;
  7839. if (once) break;
  7840. }
  7841. }
  7842. ns.truncate(last);
  7843. }
  7844. static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
  7845. {
  7846. assert(ns.size() >= first);
  7847. assert(expr->rettype() == xpath_type_number);
  7848. size_t i = 1;
  7849. size_t size = ns.size() - first;
  7850. xpath_node* last = ns.begin() + first;
  7851. // remove_if... or well, sort of
  7852. for (xpath_node* it = last; it != ns.end(); ++it, ++i)
  7853. {
  7854. xpath_context c(*it, i, size);
  7855. if (expr->eval_number(c, stack) == static_cast<double>(i))
  7856. {
  7857. *last++ = *it;
  7858. if (once) break;
  7859. }
  7860. }
  7861. ns.truncate(last);
  7862. }
  7863. static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
  7864. {
  7865. assert(ns.size() >= first);
  7866. assert(expr->rettype() == xpath_type_number);
  7867. size_t size = ns.size() - first;
  7868. xpath_node* last = ns.begin() + first;
  7869. xpath_context c(xpath_node(), 1, size);
  7870. double er = expr->eval_number(c, stack);
  7871. if (er >= 1.0 && er <= static_cast<double>(size))
  7872. {
  7873. size_t eri = static_cast<size_t>(er);
  7874. if (er == static_cast<double>(eri))
  7875. {
  7876. xpath_node r = last[eri - 1];
  7877. *last++ = r;
  7878. }
  7879. }
  7880. ns.truncate(last);
  7881. }
  7882. void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
  7883. {
  7884. if (ns.size() == first) return;
  7885. assert(_type == ast_filter || _type == ast_predicate);
  7886. if (_test == predicate_constant || _test == predicate_constant_one)
  7887. apply_predicate_number_const(ns, first, _right, stack);
  7888. else if (_right->rettype() == xpath_type_number)
  7889. apply_predicate_number(ns, first, _right, stack, once);
  7890. else
  7891. apply_predicate_boolean(ns, first, _right, stack, once);
  7892. }
  7893. void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
  7894. {
  7895. if (ns.size() == first) return;
  7896. bool last_once = eval_once(ns.type(), eval);
  7897. for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
  7898. pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
  7899. }
  7900. bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
  7901. {
  7902. assert(a);
  7903. const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
  7904. switch (_test)
  7905. {
  7906. case nodetest_name:
  7907. if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
  7908. {
  7909. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7910. return true;
  7911. }
  7912. break;
  7913. case nodetest_type_node:
  7914. case nodetest_all:
  7915. if (is_xpath_attribute(name))
  7916. {
  7917. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7918. return true;
  7919. }
  7920. break;
  7921. case nodetest_all_in_namespace:
  7922. if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
  7923. {
  7924. ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
  7925. return true;
  7926. }
  7927. break;
  7928. default:
  7929. ;
  7930. }
  7931. return false;
  7932. }
  7933. bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
  7934. {
  7935. assert(n);
  7936. xml_node_type type = PUGI__NODETYPE(n);
  7937. switch (_test)
  7938. {
  7939. case nodetest_name:
  7940. if (type == node_element && n->name && strequal(n->name, _data.nodetest))
  7941. {
  7942. ns.push_back(xml_node(n), alloc);
  7943. return true;
  7944. }
  7945. break;
  7946. case nodetest_type_node:
  7947. ns.push_back(xml_node(n), alloc);
  7948. return true;
  7949. case nodetest_type_comment:
  7950. if (type == node_comment)
  7951. {
  7952. ns.push_back(xml_node(n), alloc);
  7953. return true;
  7954. }
  7955. break;
  7956. case nodetest_type_text:
  7957. if (type == node_pcdata || type == node_cdata)
  7958. {
  7959. ns.push_back(xml_node(n), alloc);
  7960. return true;
  7961. }
  7962. break;
  7963. case nodetest_type_pi:
  7964. if (type == node_pi)
  7965. {
  7966. ns.push_back(xml_node(n), alloc);
  7967. return true;
  7968. }
  7969. break;
  7970. case nodetest_pi:
  7971. if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
  7972. {
  7973. ns.push_back(xml_node(n), alloc);
  7974. return true;
  7975. }
  7976. break;
  7977. case nodetest_all:
  7978. if (type == node_element)
  7979. {
  7980. ns.push_back(xml_node(n), alloc);
  7981. return true;
  7982. }
  7983. break;
  7984. case nodetest_all_in_namespace:
  7985. if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
  7986. {
  7987. ns.push_back(xml_node(n), alloc);
  7988. return true;
  7989. }
  7990. break;
  7991. default:
  7992. assert(false && "Unknown axis"); // unreachable
  7993. }
  7994. return false;
  7995. }
  7996. template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
  7997. {
  7998. const axis_t axis = T::axis;
  7999. switch (axis)
  8000. {
  8001. case axis_attribute:
  8002. {
  8003. for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
  8004. if (step_push(ns, a, n, alloc) & once)
  8005. return;
  8006. break;
  8007. }
  8008. case axis_child:
  8009. {
  8010. for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
  8011. if (step_push(ns, c, alloc) & once)
  8012. return;
  8013. break;
  8014. }
  8015. case axis_descendant:
  8016. case axis_descendant_or_self:
  8017. {
  8018. if (axis == axis_descendant_or_self)
  8019. if (step_push(ns, n, alloc) & once)
  8020. return;
  8021. xml_node_struct* cur = n->first_child;
  8022. while (cur)
  8023. {
  8024. if (step_push(ns, cur, alloc) & once)
  8025. return;
  8026. if (cur->first_child)
  8027. cur = cur->first_child;
  8028. else
  8029. {
  8030. while (!cur->next_sibling)
  8031. {
  8032. cur = cur->parent;
  8033. if (cur == n) return;
  8034. }
  8035. cur = cur->next_sibling;
  8036. }
  8037. }
  8038. break;
  8039. }
  8040. case axis_following_sibling:
  8041. {
  8042. for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
  8043. if (step_push(ns, c, alloc) & once)
  8044. return;
  8045. break;
  8046. }
  8047. case axis_preceding_sibling:
  8048. {
  8049. for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
  8050. if (step_push(ns, c, alloc) & once)
  8051. return;
  8052. break;
  8053. }
  8054. case axis_following:
  8055. {
  8056. xml_node_struct* cur = n;
  8057. // exit from this node so that we don't include descendants
  8058. while (!cur->next_sibling)
  8059. {
  8060. cur = cur->parent;
  8061. if (!cur) return;
  8062. }
  8063. cur = cur->next_sibling;
  8064. while (cur)
  8065. {
  8066. if (step_push(ns, cur, alloc) & once)
  8067. return;
  8068. if (cur->first_child)
  8069. cur = cur->first_child;
  8070. else
  8071. {
  8072. while (!cur->next_sibling)
  8073. {
  8074. cur = cur->parent;
  8075. if (!cur) return;
  8076. }
  8077. cur = cur->next_sibling;
  8078. }
  8079. }
  8080. break;
  8081. }
  8082. case axis_preceding:
  8083. {
  8084. xml_node_struct* cur = n;
  8085. // exit from this node so that we don't include descendants
  8086. while (!cur->prev_sibling_c->next_sibling)
  8087. {
  8088. cur = cur->parent;
  8089. if (!cur) return;
  8090. }
  8091. cur = cur->prev_sibling_c;
  8092. while (cur)
  8093. {
  8094. if (cur->first_child)
  8095. cur = cur->first_child->prev_sibling_c;
  8096. else
  8097. {
  8098. // leaf node, can't be ancestor
  8099. if (step_push(ns, cur, alloc) & once)
  8100. return;
  8101. while (!cur->prev_sibling_c->next_sibling)
  8102. {
  8103. cur = cur->parent;
  8104. if (!cur) return;
  8105. if (!node_is_ancestor(cur, n))
  8106. if (step_push(ns, cur, alloc) & once)
  8107. return;
  8108. }
  8109. cur = cur->prev_sibling_c;
  8110. }
  8111. }
  8112. break;
  8113. }
  8114. case axis_ancestor:
  8115. case axis_ancestor_or_self:
  8116. {
  8117. if (axis == axis_ancestor_or_self)
  8118. if (step_push(ns, n, alloc) & once)
  8119. return;
  8120. xml_node_struct* cur = n->parent;
  8121. while (cur)
  8122. {
  8123. if (step_push(ns, cur, alloc) & once)
  8124. return;
  8125. cur = cur->parent;
  8126. }
  8127. break;
  8128. }
  8129. case axis_self:
  8130. {
  8131. step_push(ns, n, alloc);
  8132. break;
  8133. }
  8134. case axis_parent:
  8135. {
  8136. if (n->parent)
  8137. step_push(ns, n->parent, alloc);
  8138. break;
  8139. }
  8140. default:
  8141. assert(false && "Unimplemented axis"); // unreachable
  8142. }
  8143. }
  8144. template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
  8145. {
  8146. const axis_t axis = T::axis;
  8147. switch (axis)
  8148. {
  8149. case axis_ancestor:
  8150. case axis_ancestor_or_self:
  8151. {
  8152. if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
  8153. if (step_push(ns, a, p, alloc) & once)
  8154. return;
  8155. xml_node_struct* cur = p;
  8156. while (cur)
  8157. {
  8158. if (step_push(ns, cur, alloc) & once)
  8159. return;
  8160. cur = cur->parent;
  8161. }
  8162. break;
  8163. }
  8164. case axis_descendant_or_self:
  8165. case axis_self:
  8166. {
  8167. if (_test == nodetest_type_node) // reject attributes based on principal node type test
  8168. step_push(ns, a, p, alloc);
  8169. break;
  8170. }
  8171. case axis_following:
  8172. {
  8173. xml_node_struct* cur = p;
  8174. while (cur)
  8175. {
  8176. if (cur->first_child)
  8177. cur = cur->first_child;
  8178. else
  8179. {
  8180. while (!cur->next_sibling)
  8181. {
  8182. cur = cur->parent;
  8183. if (!cur) return;
  8184. }
  8185. cur = cur->next_sibling;
  8186. }
  8187. if (step_push(ns, cur, alloc) & once)
  8188. return;
  8189. }
  8190. break;
  8191. }
  8192. case axis_parent:
  8193. {
  8194. step_push(ns, p, alloc);
  8195. break;
  8196. }
  8197. case axis_preceding:
  8198. {
  8199. // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
  8200. step_fill(ns, p, alloc, once, v);
  8201. break;
  8202. }
  8203. default:
  8204. assert(false && "Unimplemented axis"); // unreachable
  8205. }
  8206. }
  8207. template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
  8208. {
  8209. const axis_t axis = T::axis;
  8210. const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
  8211. if (xn.node())
  8212. step_fill(ns, xn.node().internal_object(), alloc, once, v);
  8213. else if (axis_has_attributes && xn.attribute() && xn.parent())
  8214. step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
  8215. }
  8216. template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
  8217. {
  8218. const axis_t axis = T::axis;
  8219. const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
  8220. const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
  8221. bool once =
  8222. (axis == axis_attribute && _test == nodetest_name) ||
  8223. (!_right && eval_once(axis_type, eval)) ||
  8224. // coverity[mixed_enums]
  8225. (_right && !_right->_next && _right->_test == predicate_constant_one);
  8226. xpath_node_set_raw ns;
  8227. ns.set_type(axis_type);
  8228. if (_left)
  8229. {
  8230. xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
  8231. // self axis preserves the original order
  8232. if (axis == axis_self) ns.set_type(s.type());
  8233. for (const xpath_node* it = s.begin(); it != s.end(); ++it)
  8234. {
  8235. size_t size = ns.size();
  8236. // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
  8237. if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
  8238. step_fill(ns, *it, stack.result, once, v);
  8239. if (_right) apply_predicates(ns, size, stack, eval);
  8240. }
  8241. }
  8242. else
  8243. {
  8244. step_fill(ns, c.n, stack.result, once, v);
  8245. if (_right) apply_predicates(ns, 0, stack, eval);
  8246. }
  8247. // child, attribute and self axes always generate unique set of nodes
  8248. // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
  8249. if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
  8250. ns.remove_duplicates(stack.temp);
  8251. return ns;
  8252. }
  8253. public:
  8254. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
  8255. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8256. {
  8257. assert(type == ast_string_constant);
  8258. _data.string = value;
  8259. }
  8260. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
  8261. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8262. {
  8263. assert(type == ast_number_constant);
  8264. _data.number = value;
  8265. }
  8266. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
  8267. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
  8268. {
  8269. assert(type == ast_variable);
  8270. _data.variable = value;
  8271. }
  8272. xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
  8273. _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
  8274. {
  8275. }
  8276. xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
  8277. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
  8278. {
  8279. assert(type == ast_step);
  8280. _data.nodetest = contents;
  8281. }
  8282. xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
  8283. _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
  8284. {
  8285. assert(type == ast_filter || type == ast_predicate);
  8286. }
  8287. void set_next(xpath_ast_node* value)
  8288. {
  8289. _next = value;
  8290. }
  8291. void set_right(xpath_ast_node* value)
  8292. {
  8293. _right = value;
  8294. }
  8295. bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
  8296. {
  8297. switch (_type)
  8298. {
  8299. case ast_op_or:
  8300. return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
  8301. case ast_op_and:
  8302. return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
  8303. case ast_op_equal:
  8304. return compare_eq(_left, _right, c, stack, equal_to());
  8305. case ast_op_not_equal:
  8306. return compare_eq(_left, _right, c, stack, not_equal_to());
  8307. case ast_op_less:
  8308. return compare_rel(_left, _right, c, stack, less());
  8309. case ast_op_greater:
  8310. return compare_rel(_right, _left, c, stack, less());
  8311. case ast_op_less_or_equal:
  8312. return compare_rel(_left, _right, c, stack, less_equal());
  8313. case ast_op_greater_or_equal:
  8314. return compare_rel(_right, _left, c, stack, less_equal());
  8315. case ast_func_starts_with:
  8316. {
  8317. xpath_allocator_capture cr(stack.result);
  8318. xpath_string lr = _left->eval_string(c, stack);
  8319. xpath_string rr = _right->eval_string(c, stack);
  8320. return starts_with(lr.c_str(), rr.c_str());
  8321. }
  8322. case ast_func_contains:
  8323. {
  8324. xpath_allocator_capture cr(stack.result);
  8325. xpath_string lr = _left->eval_string(c, stack);
  8326. xpath_string rr = _right->eval_string(c, stack);
  8327. return find_substring(lr.c_str(), rr.c_str()) != 0;
  8328. }
  8329. case ast_func_boolean:
  8330. return _left->eval_boolean(c, stack);
  8331. case ast_func_not:
  8332. return !_left->eval_boolean(c, stack);
  8333. case ast_func_true:
  8334. return true;
  8335. case ast_func_false:
  8336. return false;
  8337. case ast_func_lang:
  8338. {
  8339. if (c.n.attribute()) return false;
  8340. xpath_allocator_capture cr(stack.result);
  8341. xpath_string lang = _left->eval_string(c, stack);
  8342. for (xml_node n = c.n.node(); n; n = n.parent())
  8343. {
  8344. xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
  8345. if (a)
  8346. {
  8347. const char_t* value = a.value();
  8348. // strnicmp / strncasecmp is not portable
  8349. for (const char_t* lit = lang.c_str(); *lit; ++lit)
  8350. {
  8351. if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
  8352. ++value;
  8353. }
  8354. return *value == 0 || *value == '-';
  8355. }
  8356. }
  8357. return false;
  8358. }
  8359. case ast_opt_compare_attribute:
  8360. {
  8361. const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
  8362. xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
  8363. return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
  8364. }
  8365. case ast_variable:
  8366. {
  8367. assert(_rettype == _data.variable->type());
  8368. if (_rettype == xpath_type_boolean)
  8369. return _data.variable->get_boolean();
  8370. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8371. break;
  8372. }
  8373. default:
  8374. ;
  8375. }
  8376. // none of the ast types that return the value directly matched, we need to perform type conversion
  8377. switch (_rettype)
  8378. {
  8379. case xpath_type_number:
  8380. return convert_number_to_boolean(eval_number(c, stack));
  8381. case xpath_type_string:
  8382. {
  8383. xpath_allocator_capture cr(stack.result);
  8384. return !eval_string(c, stack).empty();
  8385. }
  8386. case xpath_type_node_set:
  8387. {
  8388. xpath_allocator_capture cr(stack.result);
  8389. return !eval_node_set(c, stack, nodeset_eval_any).empty();
  8390. }
  8391. default:
  8392. assert(false && "Wrong expression for return type boolean"); // unreachable
  8393. return false;
  8394. }
  8395. }
  8396. double eval_number(const xpath_context& c, const xpath_stack& stack)
  8397. {
  8398. switch (_type)
  8399. {
  8400. case ast_op_add:
  8401. return _left->eval_number(c, stack) + _right->eval_number(c, stack);
  8402. case ast_op_subtract:
  8403. return _left->eval_number(c, stack) - _right->eval_number(c, stack);
  8404. case ast_op_multiply:
  8405. return _left->eval_number(c, stack) * _right->eval_number(c, stack);
  8406. case ast_op_divide:
  8407. return _left->eval_number(c, stack) / _right->eval_number(c, stack);
  8408. case ast_op_mod:
  8409. return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
  8410. case ast_op_negate:
  8411. return -_left->eval_number(c, stack);
  8412. case ast_number_constant:
  8413. return _data.number;
  8414. case ast_func_last:
  8415. return static_cast<double>(c.size);
  8416. case ast_func_position:
  8417. return static_cast<double>(c.position);
  8418. case ast_func_count:
  8419. {
  8420. xpath_allocator_capture cr(stack.result);
  8421. return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
  8422. }
  8423. case ast_func_string_length_0:
  8424. {
  8425. xpath_allocator_capture cr(stack.result);
  8426. return static_cast<double>(string_value(c.n, stack.result).length());
  8427. }
  8428. case ast_func_string_length_1:
  8429. {
  8430. xpath_allocator_capture cr(stack.result);
  8431. return static_cast<double>(_left->eval_string(c, stack).length());
  8432. }
  8433. case ast_func_number_0:
  8434. {
  8435. xpath_allocator_capture cr(stack.result);
  8436. return convert_string_to_number(string_value(c.n, stack.result).c_str());
  8437. }
  8438. case ast_func_number_1:
  8439. return _left->eval_number(c, stack);
  8440. case ast_func_sum:
  8441. {
  8442. xpath_allocator_capture cr(stack.result);
  8443. double r = 0;
  8444. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
  8445. for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
  8446. {
  8447. xpath_allocator_capture cri(stack.result);
  8448. r += convert_string_to_number(string_value(*it, stack.result).c_str());
  8449. }
  8450. return r;
  8451. }
  8452. case ast_func_floor:
  8453. {
  8454. double r = _left->eval_number(c, stack);
  8455. return r == r ? floor(r) : r;
  8456. }
  8457. case ast_func_ceiling:
  8458. {
  8459. double r = _left->eval_number(c, stack);
  8460. return r == r ? ceil(r) : r;
  8461. }
  8462. case ast_func_round:
  8463. return round_nearest_nzero(_left->eval_number(c, stack));
  8464. case ast_variable:
  8465. {
  8466. assert(_rettype == _data.variable->type());
  8467. if (_rettype == xpath_type_number)
  8468. return _data.variable->get_number();
  8469. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8470. break;
  8471. }
  8472. default:
  8473. ;
  8474. }
  8475. // none of the ast types that return the value directly matched, we need to perform type conversion
  8476. switch (_rettype)
  8477. {
  8478. case xpath_type_boolean:
  8479. return eval_boolean(c, stack) ? 1 : 0;
  8480. case xpath_type_string:
  8481. {
  8482. xpath_allocator_capture cr(stack.result);
  8483. return convert_string_to_number(eval_string(c, stack).c_str());
  8484. }
  8485. case xpath_type_node_set:
  8486. {
  8487. xpath_allocator_capture cr(stack.result);
  8488. return convert_string_to_number(eval_string(c, stack).c_str());
  8489. }
  8490. default:
  8491. assert(false && "Wrong expression for return type number"); // unreachable
  8492. return 0;
  8493. }
  8494. }
  8495. xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
  8496. {
  8497. assert(_type == ast_func_concat);
  8498. xpath_allocator_capture ct(stack.temp);
  8499. // count the string number
  8500. size_t count = 1;
  8501. for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
  8502. // allocate a buffer for temporary string objects
  8503. xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
  8504. if (!buffer) return xpath_string();
  8505. // evaluate all strings to temporary stack
  8506. xpath_stack swapped_stack = {stack.temp, stack.result};
  8507. buffer[0] = _left->eval_string(c, swapped_stack);
  8508. size_t pos = 1;
  8509. for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
  8510. assert(pos == count);
  8511. // get total length
  8512. size_t length = 0;
  8513. for (size_t i = 0; i < count; ++i) length += buffer[i].length();
  8514. // create final string
  8515. char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
  8516. if (!result) return xpath_string();
  8517. char_t* ri = result;
  8518. for (size_t j = 0; j < count; ++j)
  8519. for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
  8520. *ri++ = *bi;
  8521. *ri = 0;
  8522. return xpath_string::from_heap_preallocated(result, ri);
  8523. }
  8524. xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
  8525. {
  8526. switch (_type)
  8527. {
  8528. case ast_string_constant:
  8529. return xpath_string::from_const(_data.string);
  8530. case ast_func_local_name_0:
  8531. {
  8532. xpath_node na = c.n;
  8533. return xpath_string::from_const(local_name(na));
  8534. }
  8535. case ast_func_local_name_1:
  8536. {
  8537. xpath_allocator_capture cr(stack.result);
  8538. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8539. xpath_node na = ns.first();
  8540. return xpath_string::from_const(local_name(na));
  8541. }
  8542. case ast_func_name_0:
  8543. {
  8544. xpath_node na = c.n;
  8545. return xpath_string::from_const(qualified_name(na));
  8546. }
  8547. case ast_func_name_1:
  8548. {
  8549. xpath_allocator_capture cr(stack.result);
  8550. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8551. xpath_node na = ns.first();
  8552. return xpath_string::from_const(qualified_name(na));
  8553. }
  8554. case ast_func_namespace_uri_0:
  8555. {
  8556. xpath_node na = c.n;
  8557. return xpath_string::from_const(namespace_uri(na));
  8558. }
  8559. case ast_func_namespace_uri_1:
  8560. {
  8561. xpath_allocator_capture cr(stack.result);
  8562. xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
  8563. xpath_node na = ns.first();
  8564. return xpath_string::from_const(namespace_uri(na));
  8565. }
  8566. case ast_func_string_0:
  8567. return string_value(c.n, stack.result);
  8568. case ast_func_string_1:
  8569. return _left->eval_string(c, stack);
  8570. case ast_func_concat:
  8571. return eval_string_concat(c, stack);
  8572. case ast_func_substring_before:
  8573. {
  8574. xpath_allocator_capture cr(stack.temp);
  8575. xpath_stack swapped_stack = {stack.temp, stack.result};
  8576. xpath_string s = _left->eval_string(c, swapped_stack);
  8577. xpath_string p = _right->eval_string(c, swapped_stack);
  8578. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8579. return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
  8580. }
  8581. case ast_func_substring_after:
  8582. {
  8583. xpath_allocator_capture cr(stack.temp);
  8584. xpath_stack swapped_stack = {stack.temp, stack.result};
  8585. xpath_string s = _left->eval_string(c, swapped_stack);
  8586. xpath_string p = _right->eval_string(c, swapped_stack);
  8587. const char_t* pos = find_substring(s.c_str(), p.c_str());
  8588. if (!pos) return xpath_string();
  8589. const char_t* rbegin = pos + p.length();
  8590. const char_t* rend = s.c_str() + s.length();
  8591. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8592. }
  8593. case ast_func_substring_2:
  8594. {
  8595. xpath_allocator_capture cr(stack.temp);
  8596. xpath_stack swapped_stack = {stack.temp, stack.result};
  8597. xpath_string s = _left->eval_string(c, swapped_stack);
  8598. size_t s_length = s.length();
  8599. double first = round_nearest(_right->eval_number(c, stack));
  8600. if (is_nan(first)) return xpath_string(); // NaN
  8601. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8602. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8603. assert(1 <= pos && pos <= s_length + 1);
  8604. const char_t* rbegin = s.c_str() + (pos - 1);
  8605. const char_t* rend = s.c_str() + s.length();
  8606. return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
  8607. }
  8608. case ast_func_substring_3:
  8609. {
  8610. xpath_allocator_capture cr(stack.temp);
  8611. xpath_stack swapped_stack = {stack.temp, stack.result};
  8612. xpath_string s = _left->eval_string(c, swapped_stack);
  8613. size_t s_length = s.length();
  8614. double first = round_nearest(_right->eval_number(c, stack));
  8615. double last = first + round_nearest(_right->_next->eval_number(c, stack));
  8616. if (is_nan(first) || is_nan(last)) return xpath_string();
  8617. else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
  8618. else if (first >= last) return xpath_string();
  8619. else if (last < 1) return xpath_string();
  8620. size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
  8621. size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
  8622. assert(1 <= pos && pos <= end && end <= s_length + 1);
  8623. const char_t* rbegin = s.c_str() + (pos - 1);
  8624. const char_t* rend = s.c_str() + (end - 1);
  8625. return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
  8626. }
  8627. case ast_func_normalize_space_0:
  8628. {
  8629. xpath_string s = string_value(c.n, stack.result);
  8630. char_t* begin = s.data(stack.result);
  8631. if (!begin) return xpath_string();
  8632. char_t* end = normalize_space(begin);
  8633. return xpath_string::from_heap_preallocated(begin, end);
  8634. }
  8635. case ast_func_normalize_space_1:
  8636. {
  8637. xpath_string s = _left->eval_string(c, stack);
  8638. char_t* begin = s.data(stack.result);
  8639. if (!begin) return xpath_string();
  8640. char_t* end = normalize_space(begin);
  8641. return xpath_string::from_heap_preallocated(begin, end);
  8642. }
  8643. case ast_func_translate:
  8644. {
  8645. xpath_allocator_capture cr(stack.temp);
  8646. xpath_stack swapped_stack = {stack.temp, stack.result};
  8647. xpath_string s = _left->eval_string(c, stack);
  8648. xpath_string from = _right->eval_string(c, swapped_stack);
  8649. xpath_string to = _right->_next->eval_string(c, swapped_stack);
  8650. char_t* begin = s.data(stack.result);
  8651. if (!begin) return xpath_string();
  8652. char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
  8653. return xpath_string::from_heap_preallocated(begin, end);
  8654. }
  8655. case ast_opt_translate_table:
  8656. {
  8657. xpath_string s = _left->eval_string(c, stack);
  8658. char_t* begin = s.data(stack.result);
  8659. if (!begin) return xpath_string();
  8660. char_t* end = translate_table(begin, _data.table);
  8661. return xpath_string::from_heap_preallocated(begin, end);
  8662. }
  8663. case ast_variable:
  8664. {
  8665. assert(_rettype == _data.variable->type());
  8666. if (_rettype == xpath_type_string)
  8667. return xpath_string::from_const(_data.variable->get_string());
  8668. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8669. break;
  8670. }
  8671. default:
  8672. ;
  8673. }
  8674. // none of the ast types that return the value directly matched, we need to perform type conversion
  8675. switch (_rettype)
  8676. {
  8677. case xpath_type_boolean:
  8678. return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
  8679. case xpath_type_number:
  8680. return convert_number_to_string(eval_number(c, stack), stack.result);
  8681. case xpath_type_node_set:
  8682. {
  8683. xpath_allocator_capture cr(stack.temp);
  8684. xpath_stack swapped_stack = {stack.temp, stack.result};
  8685. xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
  8686. return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
  8687. }
  8688. default:
  8689. assert(false && "Wrong expression for return type string"); // unreachable
  8690. return xpath_string();
  8691. }
  8692. }
  8693. xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
  8694. {
  8695. switch (_type)
  8696. {
  8697. case ast_op_union:
  8698. {
  8699. xpath_allocator_capture cr(stack.temp);
  8700. xpath_stack swapped_stack = {stack.temp, stack.result};
  8701. xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
  8702. xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
  8703. // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
  8704. ls.set_type(xpath_node_set::type_unsorted);
  8705. ls.append(rs.begin(), rs.end(), stack.result);
  8706. ls.remove_duplicates(stack.temp);
  8707. return ls;
  8708. }
  8709. case ast_filter:
  8710. {
  8711. xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
  8712. // either expression is a number or it contains position() call; sort by document order
  8713. if (_test != predicate_posinv) set.sort_do();
  8714. bool once = eval_once(set.type(), eval);
  8715. apply_predicate(set, 0, stack, once);
  8716. return set;
  8717. }
  8718. case ast_func_id:
  8719. return xpath_node_set_raw();
  8720. case ast_step:
  8721. {
  8722. switch (_axis)
  8723. {
  8724. case axis_ancestor:
  8725. return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
  8726. case axis_ancestor_or_self:
  8727. return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
  8728. case axis_attribute:
  8729. return step_do(c, stack, eval, axis_to_type<axis_attribute>());
  8730. case axis_child:
  8731. return step_do(c, stack, eval, axis_to_type<axis_child>());
  8732. case axis_descendant:
  8733. return step_do(c, stack, eval, axis_to_type<axis_descendant>());
  8734. case axis_descendant_or_self:
  8735. return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
  8736. case axis_following:
  8737. return step_do(c, stack, eval, axis_to_type<axis_following>());
  8738. case axis_following_sibling:
  8739. return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
  8740. case axis_namespace:
  8741. // namespaced axis is not supported
  8742. return xpath_node_set_raw();
  8743. case axis_parent:
  8744. return step_do(c, stack, eval, axis_to_type<axis_parent>());
  8745. case axis_preceding:
  8746. return step_do(c, stack, eval, axis_to_type<axis_preceding>());
  8747. case axis_preceding_sibling:
  8748. return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
  8749. case axis_self:
  8750. return step_do(c, stack, eval, axis_to_type<axis_self>());
  8751. default:
  8752. assert(false && "Unknown axis"); // unreachable
  8753. return xpath_node_set_raw();
  8754. }
  8755. }
  8756. case ast_step_root:
  8757. {
  8758. assert(!_right); // root step can't have any predicates
  8759. xpath_node_set_raw ns;
  8760. ns.set_type(xpath_node_set::type_sorted);
  8761. if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
  8762. else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
  8763. return ns;
  8764. }
  8765. case ast_variable:
  8766. {
  8767. assert(_rettype == _data.variable->type());
  8768. if (_rettype == xpath_type_node_set)
  8769. {
  8770. const xpath_node_set& s = _data.variable->get_node_set();
  8771. xpath_node_set_raw ns;
  8772. ns.set_type(s.type());
  8773. ns.append(s.begin(), s.end(), stack.result);
  8774. return ns;
  8775. }
  8776. // variable needs to be converted to the correct type, this is handled by the fallthrough block below
  8777. break;
  8778. }
  8779. default:
  8780. ;
  8781. }
  8782. // none of the ast types that return the value directly matched, but conversions to node set are invalid
  8783. assert(false && "Wrong expression for return type node set"); // unreachable
  8784. return xpath_node_set_raw();
  8785. }
  8786. void optimize(xpath_allocator* alloc)
  8787. {
  8788. if (_left)
  8789. _left->optimize(alloc);
  8790. if (_right)
  8791. _right->optimize(alloc);
  8792. if (_next)
  8793. _next->optimize(alloc);
  8794. // coverity[var_deref_model]
  8795. optimize_self(alloc);
  8796. }
  8797. void optimize_self(xpath_allocator* alloc)
  8798. {
  8799. // Rewrite [position()=expr] with [expr]
  8800. // Note that this step has to go before classification to recognize [position()=1]
  8801. if ((_type == ast_filter || _type == ast_predicate) &&
  8802. _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8803. _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
  8804. {
  8805. _right = _right->_right;
  8806. }
  8807. // Classify filter/predicate ops to perform various optimizations during evaluation
  8808. if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
  8809. {
  8810. assert(_test == predicate_default);
  8811. if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
  8812. _test = predicate_constant_one;
  8813. else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
  8814. _test = predicate_constant;
  8815. else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
  8816. _test = predicate_posinv;
  8817. }
  8818. // Rewrite descendant-or-self::node()/child::foo with descendant::foo
  8819. // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
  8820. // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
  8821. // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
  8822. if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
  8823. _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
  8824. is_posinv_step())
  8825. {
  8826. if (_axis == axis_child || _axis == axis_descendant)
  8827. _axis = axis_descendant;
  8828. else
  8829. _axis = axis_descendant_or_self;
  8830. _left = _left->_left;
  8831. }
  8832. // Use optimized lookup table implementation for translate() with constant arguments
  8833. if (_type == ast_func_translate &&
  8834. _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
  8835. _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
  8836. {
  8837. unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
  8838. if (table)
  8839. {
  8840. _type = ast_opt_translate_table;
  8841. _data.table = table;
  8842. }
  8843. }
  8844. // Use optimized path for @attr = 'value' or @attr = $value
  8845. if (_type == ast_op_equal &&
  8846. _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
  8847. // coverity[mixed_enums]
  8848. _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
  8849. (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
  8850. {
  8851. _type = ast_opt_compare_attribute;
  8852. }
  8853. }
  8854. bool is_posinv_expr() const
  8855. {
  8856. switch (_type)
  8857. {
  8858. case ast_func_position:
  8859. case ast_func_last:
  8860. return false;
  8861. case ast_string_constant:
  8862. case ast_number_constant:
  8863. case ast_variable:
  8864. return true;
  8865. case ast_step:
  8866. case ast_step_root:
  8867. return true;
  8868. case ast_predicate:
  8869. case ast_filter:
  8870. return true;
  8871. default:
  8872. if (_left && !_left->is_posinv_expr()) return false;
  8873. for (xpath_ast_node* n = _right; n; n = n->_next)
  8874. if (!n->is_posinv_expr()) return false;
  8875. return true;
  8876. }
  8877. }
  8878. bool is_posinv_step() const
  8879. {
  8880. assert(_type == ast_step);
  8881. for (xpath_ast_node* n = _right; n; n = n->_next)
  8882. {
  8883. assert(n->_type == ast_predicate);
  8884. if (n->_test != predicate_posinv)
  8885. return false;
  8886. }
  8887. return true;
  8888. }
  8889. xpath_value_type rettype() const
  8890. {
  8891. return static_cast<xpath_value_type>(_rettype);
  8892. }
  8893. };
  8894. static const size_t xpath_ast_depth_limit =
  8895. #ifdef PUGIXML_XPATH_DEPTH_LIMIT
  8896. PUGIXML_XPATH_DEPTH_LIMIT
  8897. #else
  8898. 1024
  8899. #endif
  8900. ;
  8901. struct xpath_parser
  8902. {
  8903. xpath_allocator* _alloc;
  8904. xpath_lexer _lexer;
  8905. const char_t* _query;
  8906. xpath_variable_set* _variables;
  8907. xpath_parse_result* _result;
  8908. char_t _scratch[32];
  8909. size_t _depth;
  8910. xpath_ast_node* error(const char* message)
  8911. {
  8912. _result->error = message;
  8913. _result->offset = _lexer.current_pos() - _query;
  8914. return 0;
  8915. }
  8916. xpath_ast_node* error_oom()
  8917. {
  8918. assert(_alloc->_error);
  8919. *_alloc->_error = true;
  8920. return 0;
  8921. }
  8922. xpath_ast_node* error_rec()
  8923. {
  8924. return error("Exceeded maximum allowed query depth");
  8925. }
  8926. void* alloc_node()
  8927. {
  8928. return _alloc->allocate(sizeof(xpath_ast_node));
  8929. }
  8930. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
  8931. {
  8932. void* memory = alloc_node();
  8933. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8934. }
  8935. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
  8936. {
  8937. void* memory = alloc_node();
  8938. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8939. }
  8940. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
  8941. {
  8942. void* memory = alloc_node();
  8943. return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
  8944. }
  8945. xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
  8946. {
  8947. void* memory = alloc_node();
  8948. return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
  8949. }
  8950. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
  8951. {
  8952. void* memory = alloc_node();
  8953. return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
  8954. }
  8955. xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
  8956. {
  8957. void* memory = alloc_node();
  8958. return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
  8959. }
  8960. const char_t* alloc_string(const xpath_lexer_string& value)
  8961. {
  8962. if (!value.begin)
  8963. return PUGIXML_TEXT("");
  8964. size_t length = static_cast<size_t>(value.end - value.begin);
  8965. char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
  8966. if (!c) return 0;
  8967. memcpy(c, value.begin, length * sizeof(char_t));
  8968. c[length] = 0;
  8969. return c;
  8970. }
  8971. xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
  8972. {
  8973. switch (name.begin[0])
  8974. {
  8975. case 'b':
  8976. if (name == PUGIXML_TEXT("boolean") && argc == 1)
  8977. return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
  8978. break;
  8979. case 'c':
  8980. if (name == PUGIXML_TEXT("count") && argc == 1)
  8981. {
  8982. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  8983. return alloc_node(ast_func_count, xpath_type_number, args[0]);
  8984. }
  8985. else if (name == PUGIXML_TEXT("contains") && argc == 2)
  8986. return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
  8987. else if (name == PUGIXML_TEXT("concat") && argc >= 2)
  8988. return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
  8989. else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
  8990. return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
  8991. break;
  8992. case 'f':
  8993. if (name == PUGIXML_TEXT("false") && argc == 0)
  8994. return alloc_node(ast_func_false, xpath_type_boolean);
  8995. else if (name == PUGIXML_TEXT("floor") && argc == 1)
  8996. return alloc_node(ast_func_floor, xpath_type_number, args[0]);
  8997. break;
  8998. case 'i':
  8999. if (name == PUGIXML_TEXT("id") && argc == 1)
  9000. return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
  9001. break;
  9002. case 'l':
  9003. if (name == PUGIXML_TEXT("last") && argc == 0)
  9004. return alloc_node(ast_func_last, xpath_type_number);
  9005. else if (name == PUGIXML_TEXT("lang") && argc == 1)
  9006. return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
  9007. else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
  9008. {
  9009. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9010. return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
  9011. }
  9012. break;
  9013. case 'n':
  9014. if (name == PUGIXML_TEXT("name") && argc <= 1)
  9015. {
  9016. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9017. return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
  9018. }
  9019. else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
  9020. {
  9021. if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9022. return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
  9023. }
  9024. else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
  9025. return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
  9026. else if (name == PUGIXML_TEXT("not") && argc == 1)
  9027. return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
  9028. else if (name == PUGIXML_TEXT("number") && argc <= 1)
  9029. return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
  9030. break;
  9031. case 'p':
  9032. if (name == PUGIXML_TEXT("position") && argc == 0)
  9033. return alloc_node(ast_func_position, xpath_type_number);
  9034. break;
  9035. case 'r':
  9036. if (name == PUGIXML_TEXT("round") && argc == 1)
  9037. return alloc_node(ast_func_round, xpath_type_number, args[0]);
  9038. break;
  9039. case 's':
  9040. if (name == PUGIXML_TEXT("string") && argc <= 1)
  9041. return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
  9042. else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
  9043. return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
  9044. else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
  9045. return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
  9046. else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
  9047. return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
  9048. else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
  9049. return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
  9050. else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
  9051. return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
  9052. else if (name == PUGIXML_TEXT("sum") && argc == 1)
  9053. {
  9054. if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
  9055. return alloc_node(ast_func_sum, xpath_type_number, args[0]);
  9056. }
  9057. break;
  9058. case 't':
  9059. if (name == PUGIXML_TEXT("translate") && argc == 3)
  9060. return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
  9061. else if (name == PUGIXML_TEXT("true") && argc == 0)
  9062. return alloc_node(ast_func_true, xpath_type_boolean);
  9063. break;
  9064. default:
  9065. break;
  9066. }
  9067. return error("Unrecognized function or wrong parameter count");
  9068. }
  9069. axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
  9070. {
  9071. specified = true;
  9072. switch (name.begin[0])
  9073. {
  9074. case 'a':
  9075. if (name == PUGIXML_TEXT("ancestor"))
  9076. return axis_ancestor;
  9077. else if (name == PUGIXML_TEXT("ancestor-or-self"))
  9078. return axis_ancestor_or_self;
  9079. else if (name == PUGIXML_TEXT("attribute"))
  9080. return axis_attribute;
  9081. break;
  9082. case 'c':
  9083. if (name == PUGIXML_TEXT("child"))
  9084. return axis_child;
  9085. break;
  9086. case 'd':
  9087. if (name == PUGIXML_TEXT("descendant"))
  9088. return axis_descendant;
  9089. else if (name == PUGIXML_TEXT("descendant-or-self"))
  9090. return axis_descendant_or_self;
  9091. break;
  9092. case 'f':
  9093. if (name == PUGIXML_TEXT("following"))
  9094. return axis_following;
  9095. else if (name == PUGIXML_TEXT("following-sibling"))
  9096. return axis_following_sibling;
  9097. break;
  9098. case 'n':
  9099. if (name == PUGIXML_TEXT("namespace"))
  9100. return axis_namespace;
  9101. break;
  9102. case 'p':
  9103. if (name == PUGIXML_TEXT("parent"))
  9104. return axis_parent;
  9105. else if (name == PUGIXML_TEXT("preceding"))
  9106. return axis_preceding;
  9107. else if (name == PUGIXML_TEXT("preceding-sibling"))
  9108. return axis_preceding_sibling;
  9109. break;
  9110. case 's':
  9111. if (name == PUGIXML_TEXT("self"))
  9112. return axis_self;
  9113. break;
  9114. default:
  9115. break;
  9116. }
  9117. specified = false;
  9118. return axis_child;
  9119. }
  9120. nodetest_t parse_node_test_type(const xpath_lexer_string& name)
  9121. {
  9122. switch (name.begin[0])
  9123. {
  9124. case 'c':
  9125. if (name == PUGIXML_TEXT("comment"))
  9126. return nodetest_type_comment;
  9127. break;
  9128. case 'n':
  9129. if (name == PUGIXML_TEXT("node"))
  9130. return nodetest_type_node;
  9131. break;
  9132. case 'p':
  9133. if (name == PUGIXML_TEXT("processing-instruction"))
  9134. return nodetest_type_pi;
  9135. break;
  9136. case 't':
  9137. if (name == PUGIXML_TEXT("text"))
  9138. return nodetest_type_text;
  9139. break;
  9140. default:
  9141. break;
  9142. }
  9143. return nodetest_none;
  9144. }
  9145. // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
  9146. xpath_ast_node* parse_primary_expression()
  9147. {
  9148. switch (_lexer.current())
  9149. {
  9150. case lex_var_ref:
  9151. {
  9152. xpath_lexer_string name = _lexer.contents();
  9153. if (!_variables)
  9154. return error("Unknown variable: variable set is not provided");
  9155. xpath_variable* var = 0;
  9156. if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
  9157. return error_oom();
  9158. if (!var)
  9159. return error("Unknown variable: variable set does not contain the given name");
  9160. _lexer.next();
  9161. return alloc_node(ast_variable, var->type(), var);
  9162. }
  9163. case lex_open_brace:
  9164. {
  9165. _lexer.next();
  9166. xpath_ast_node* n = parse_expression();
  9167. if (!n) return 0;
  9168. if (_lexer.current() != lex_close_brace)
  9169. return error("Expected ')' to match an opening '('");
  9170. _lexer.next();
  9171. return n;
  9172. }
  9173. case lex_quoted_string:
  9174. {
  9175. const char_t* value = alloc_string(_lexer.contents());
  9176. if (!value) return 0;
  9177. _lexer.next();
  9178. return alloc_node(ast_string_constant, xpath_type_string, value);
  9179. }
  9180. case lex_number:
  9181. {
  9182. double value = 0;
  9183. if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
  9184. return error_oom();
  9185. _lexer.next();
  9186. return alloc_node(ast_number_constant, xpath_type_number, value);
  9187. }
  9188. case lex_string:
  9189. {
  9190. xpath_ast_node* args[2] = {0};
  9191. size_t argc = 0;
  9192. xpath_lexer_string function = _lexer.contents();
  9193. _lexer.next();
  9194. xpath_ast_node* last_arg = 0;
  9195. if (_lexer.current() != lex_open_brace)
  9196. return error("Unrecognized function call");
  9197. _lexer.next();
  9198. size_t old_depth = _depth;
  9199. while (_lexer.current() != lex_close_brace)
  9200. {
  9201. if (argc > 0)
  9202. {
  9203. if (_lexer.current() != lex_comma)
  9204. return error("No comma between function arguments");
  9205. _lexer.next();
  9206. }
  9207. if (++_depth > xpath_ast_depth_limit)
  9208. return error_rec();
  9209. xpath_ast_node* n = parse_expression();
  9210. if (!n) return 0;
  9211. if (argc < 2) args[argc] = n;
  9212. else last_arg->set_next(n);
  9213. argc++;
  9214. last_arg = n;
  9215. }
  9216. _lexer.next();
  9217. _depth = old_depth;
  9218. return parse_function(function, argc, args);
  9219. }
  9220. default:
  9221. return error("Unrecognizable primary expression");
  9222. }
  9223. }
  9224. // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
  9225. // Predicate ::= '[' PredicateExpr ']'
  9226. // PredicateExpr ::= Expr
  9227. xpath_ast_node* parse_filter_expression()
  9228. {
  9229. xpath_ast_node* n = parse_primary_expression();
  9230. if (!n) return 0;
  9231. size_t old_depth = _depth;
  9232. while (_lexer.current() == lex_open_square_brace)
  9233. {
  9234. _lexer.next();
  9235. if (++_depth > xpath_ast_depth_limit)
  9236. return error_rec();
  9237. if (n->rettype() != xpath_type_node_set)
  9238. return error("Predicate has to be applied to node set");
  9239. xpath_ast_node* expr = parse_expression();
  9240. if (!expr) return 0;
  9241. n = alloc_node(ast_filter, n, expr, predicate_default);
  9242. if (!n) return 0;
  9243. if (_lexer.current() != lex_close_square_brace)
  9244. return error("Expected ']' to match an opening '['");
  9245. _lexer.next();
  9246. }
  9247. _depth = old_depth;
  9248. return n;
  9249. }
  9250. // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
  9251. // AxisSpecifier ::= AxisName '::' | '@'?
  9252. // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
  9253. // NameTest ::= '*' | NCName ':' '*' | QName
  9254. // AbbreviatedStep ::= '.' | '..'
  9255. xpath_ast_node* parse_step(xpath_ast_node* set)
  9256. {
  9257. if (set && set->rettype() != xpath_type_node_set)
  9258. return error("Step has to be applied to node set");
  9259. bool axis_specified = false;
  9260. axis_t axis = axis_child; // implied child axis
  9261. if (_lexer.current() == lex_axis_attribute)
  9262. {
  9263. axis = axis_attribute;
  9264. axis_specified = true;
  9265. _lexer.next();
  9266. }
  9267. else if (_lexer.current() == lex_dot)
  9268. {
  9269. _lexer.next();
  9270. if (_lexer.current() == lex_open_square_brace)
  9271. return error("Predicates are not allowed after an abbreviated step");
  9272. return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
  9273. }
  9274. else if (_lexer.current() == lex_double_dot)
  9275. {
  9276. _lexer.next();
  9277. if (_lexer.current() == lex_open_square_brace)
  9278. return error("Predicates are not allowed after an abbreviated step");
  9279. return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
  9280. }
  9281. nodetest_t nt_type = nodetest_none;
  9282. xpath_lexer_string nt_name;
  9283. if (_lexer.current() == lex_string)
  9284. {
  9285. // node name test
  9286. nt_name = _lexer.contents();
  9287. _lexer.next();
  9288. // was it an axis name?
  9289. if (_lexer.current() == lex_double_colon)
  9290. {
  9291. // parse axis name
  9292. if (axis_specified)
  9293. return error("Two axis specifiers in one step");
  9294. axis = parse_axis_name(nt_name, axis_specified);
  9295. if (!axis_specified)
  9296. return error("Unknown axis");
  9297. // read actual node test
  9298. _lexer.next();
  9299. if (_lexer.current() == lex_multiply)
  9300. {
  9301. nt_type = nodetest_all;
  9302. nt_name = xpath_lexer_string();
  9303. _lexer.next();
  9304. }
  9305. else if (_lexer.current() == lex_string)
  9306. {
  9307. nt_name = _lexer.contents();
  9308. _lexer.next();
  9309. }
  9310. else
  9311. {
  9312. return error("Unrecognized node test");
  9313. }
  9314. }
  9315. if (nt_type == nodetest_none)
  9316. {
  9317. // node type test or processing-instruction
  9318. if (_lexer.current() == lex_open_brace)
  9319. {
  9320. _lexer.next();
  9321. if (_lexer.current() == lex_close_brace)
  9322. {
  9323. _lexer.next();
  9324. nt_type = parse_node_test_type(nt_name);
  9325. if (nt_type == nodetest_none)
  9326. return error("Unrecognized node type");
  9327. nt_name = xpath_lexer_string();
  9328. }
  9329. else if (nt_name == PUGIXML_TEXT("processing-instruction"))
  9330. {
  9331. if (_lexer.current() != lex_quoted_string)
  9332. return error("Only literals are allowed as arguments to processing-instruction()");
  9333. nt_type = nodetest_pi;
  9334. nt_name = _lexer.contents();
  9335. _lexer.next();
  9336. if (_lexer.current() != lex_close_brace)
  9337. return error("Unmatched brace near processing-instruction()");
  9338. _lexer.next();
  9339. }
  9340. else
  9341. {
  9342. return error("Unmatched brace near node type test");
  9343. }
  9344. }
  9345. // QName or NCName:*
  9346. else
  9347. {
  9348. if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
  9349. {
  9350. nt_name.end--; // erase *
  9351. nt_type = nodetest_all_in_namespace;
  9352. }
  9353. else
  9354. {
  9355. nt_type = nodetest_name;
  9356. }
  9357. }
  9358. }
  9359. }
  9360. else if (_lexer.current() == lex_multiply)
  9361. {
  9362. nt_type = nodetest_all;
  9363. _lexer.next();
  9364. }
  9365. else
  9366. {
  9367. return error("Unrecognized node test");
  9368. }
  9369. const char_t* nt_name_copy = alloc_string(nt_name);
  9370. if (!nt_name_copy) return 0;
  9371. xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
  9372. if (!n) return 0;
  9373. size_t old_depth = _depth;
  9374. xpath_ast_node* last = 0;
  9375. while (_lexer.current() == lex_open_square_brace)
  9376. {
  9377. _lexer.next();
  9378. if (++_depth > xpath_ast_depth_limit)
  9379. return error_rec();
  9380. xpath_ast_node* expr = parse_expression();
  9381. if (!expr) return 0;
  9382. xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
  9383. if (!pred) return 0;
  9384. if (_lexer.current() != lex_close_square_brace)
  9385. return error("Expected ']' to match an opening '['");
  9386. _lexer.next();
  9387. if (last) last->set_next(pred);
  9388. else n->set_right(pred);
  9389. last = pred;
  9390. }
  9391. _depth = old_depth;
  9392. return n;
  9393. }
  9394. // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
  9395. xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
  9396. {
  9397. xpath_ast_node* n = parse_step(set);
  9398. if (!n) return 0;
  9399. size_t old_depth = _depth;
  9400. while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9401. {
  9402. lexeme_t l = _lexer.current();
  9403. _lexer.next();
  9404. if (l == lex_double_slash)
  9405. {
  9406. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9407. if (!n) return 0;
  9408. ++_depth;
  9409. }
  9410. if (++_depth > xpath_ast_depth_limit)
  9411. return error_rec();
  9412. n = parse_step(n);
  9413. if (!n) return 0;
  9414. }
  9415. _depth = old_depth;
  9416. return n;
  9417. }
  9418. // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
  9419. // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
  9420. xpath_ast_node* parse_location_path()
  9421. {
  9422. if (_lexer.current() == lex_slash)
  9423. {
  9424. _lexer.next();
  9425. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9426. if (!n) return 0;
  9427. // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
  9428. lexeme_t l = _lexer.current();
  9429. if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
  9430. return parse_relative_location_path(n);
  9431. else
  9432. return n;
  9433. }
  9434. else if (_lexer.current() == lex_double_slash)
  9435. {
  9436. _lexer.next();
  9437. xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
  9438. if (!n) return 0;
  9439. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9440. if (!n) return 0;
  9441. return parse_relative_location_path(n);
  9442. }
  9443. // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
  9444. return parse_relative_location_path(0);
  9445. }
  9446. // PathExpr ::= LocationPath
  9447. // | FilterExpr
  9448. // | FilterExpr '/' RelativeLocationPath
  9449. // | FilterExpr '//' RelativeLocationPath
  9450. // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
  9451. // UnaryExpr ::= UnionExpr | '-' UnaryExpr
  9452. xpath_ast_node* parse_path_or_unary_expression()
  9453. {
  9454. // Clarification.
  9455. // PathExpr begins with either LocationPath or FilterExpr.
  9456. // FilterExpr begins with PrimaryExpr
  9457. // PrimaryExpr begins with '$' in case of it being a variable reference,
  9458. // '(' in case of it being an expression, string literal, number constant or
  9459. // function call.
  9460. if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
  9461. _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
  9462. _lexer.current() == lex_string)
  9463. {
  9464. if (_lexer.current() == lex_string)
  9465. {
  9466. // This is either a function call, or not - if not, we shall proceed with location path
  9467. const char_t* state = _lexer.state();
  9468. while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
  9469. if (*state != '(')
  9470. return parse_location_path();
  9471. // This looks like a function call; however this still can be a node-test. Check it.
  9472. if (parse_node_test_type(_lexer.contents()) != nodetest_none)
  9473. return parse_location_path();
  9474. }
  9475. xpath_ast_node* n = parse_filter_expression();
  9476. if (!n) return 0;
  9477. if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
  9478. {
  9479. lexeme_t l = _lexer.current();
  9480. _lexer.next();
  9481. if (l == lex_double_slash)
  9482. {
  9483. if (n->rettype() != xpath_type_node_set)
  9484. return error("Step has to be applied to node set");
  9485. n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
  9486. if (!n) return 0;
  9487. }
  9488. // select from location path
  9489. return parse_relative_location_path(n);
  9490. }
  9491. return n;
  9492. }
  9493. else if (_lexer.current() == lex_minus)
  9494. {
  9495. _lexer.next();
  9496. // precedence 7+ - only parses union expressions
  9497. xpath_ast_node* n = parse_expression(7);
  9498. if (!n) return 0;
  9499. return alloc_node(ast_op_negate, xpath_type_number, n);
  9500. }
  9501. else
  9502. {
  9503. return parse_location_path();
  9504. }
  9505. }
  9506. struct binary_op_t
  9507. {
  9508. ast_type_t asttype;
  9509. xpath_value_type rettype;
  9510. int precedence;
  9511. binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
  9512. {
  9513. }
  9514. binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
  9515. {
  9516. }
  9517. static binary_op_t parse(xpath_lexer& lexer)
  9518. {
  9519. switch (lexer.current())
  9520. {
  9521. case lex_string:
  9522. if (lexer.contents() == PUGIXML_TEXT("or"))
  9523. return binary_op_t(ast_op_or, xpath_type_boolean, 1);
  9524. else if (lexer.contents() == PUGIXML_TEXT("and"))
  9525. return binary_op_t(ast_op_and, xpath_type_boolean, 2);
  9526. else if (lexer.contents() == PUGIXML_TEXT("div"))
  9527. return binary_op_t(ast_op_divide, xpath_type_number, 6);
  9528. else if (lexer.contents() == PUGIXML_TEXT("mod"))
  9529. return binary_op_t(ast_op_mod, xpath_type_number, 6);
  9530. else
  9531. return binary_op_t();
  9532. case lex_equal:
  9533. return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
  9534. case lex_not_equal:
  9535. return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
  9536. case lex_less:
  9537. return binary_op_t(ast_op_less, xpath_type_boolean, 4);
  9538. case lex_greater:
  9539. return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
  9540. case lex_less_or_equal:
  9541. return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
  9542. case lex_greater_or_equal:
  9543. return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
  9544. case lex_plus:
  9545. return binary_op_t(ast_op_add, xpath_type_number, 5);
  9546. case lex_minus:
  9547. return binary_op_t(ast_op_subtract, xpath_type_number, 5);
  9548. case lex_multiply:
  9549. return binary_op_t(ast_op_multiply, xpath_type_number, 6);
  9550. case lex_union:
  9551. return binary_op_t(ast_op_union, xpath_type_node_set, 7);
  9552. default:
  9553. return binary_op_t();
  9554. }
  9555. }
  9556. };
  9557. xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
  9558. {
  9559. binary_op_t op = binary_op_t::parse(_lexer);
  9560. while (op.asttype != ast_unknown && op.precedence >= limit)
  9561. {
  9562. _lexer.next();
  9563. if (++_depth > xpath_ast_depth_limit)
  9564. return error_rec();
  9565. xpath_ast_node* rhs = parse_path_or_unary_expression();
  9566. if (!rhs) return 0;
  9567. binary_op_t nextop = binary_op_t::parse(_lexer);
  9568. while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
  9569. {
  9570. rhs = parse_expression_rec(rhs, nextop.precedence);
  9571. if (!rhs) return 0;
  9572. nextop = binary_op_t::parse(_lexer);
  9573. }
  9574. if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
  9575. return error("Union operator has to be applied to node sets");
  9576. lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
  9577. if (!lhs) return 0;
  9578. op = binary_op_t::parse(_lexer);
  9579. }
  9580. return lhs;
  9581. }
  9582. // Expr ::= OrExpr
  9583. // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
  9584. // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
  9585. // EqualityExpr ::= RelationalExpr
  9586. // | EqualityExpr '=' RelationalExpr
  9587. // | EqualityExpr '!=' RelationalExpr
  9588. // RelationalExpr ::= AdditiveExpr
  9589. // | RelationalExpr '<' AdditiveExpr
  9590. // | RelationalExpr '>' AdditiveExpr
  9591. // | RelationalExpr '<=' AdditiveExpr
  9592. // | RelationalExpr '>=' AdditiveExpr
  9593. // AdditiveExpr ::= MultiplicativeExpr
  9594. // | AdditiveExpr '+' MultiplicativeExpr
  9595. // | AdditiveExpr '-' MultiplicativeExpr
  9596. // MultiplicativeExpr ::= UnaryExpr
  9597. // | MultiplicativeExpr '*' UnaryExpr
  9598. // | MultiplicativeExpr 'div' UnaryExpr
  9599. // | MultiplicativeExpr 'mod' UnaryExpr
  9600. xpath_ast_node* parse_expression(int limit = 0)
  9601. {
  9602. size_t old_depth = _depth;
  9603. if (++_depth > xpath_ast_depth_limit)
  9604. return error_rec();
  9605. xpath_ast_node* n = parse_path_or_unary_expression();
  9606. if (!n) return 0;
  9607. n = parse_expression_rec(n, limit);
  9608. _depth = old_depth;
  9609. return n;
  9610. }
  9611. xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
  9612. {
  9613. }
  9614. xpath_ast_node* parse()
  9615. {
  9616. xpath_ast_node* n = parse_expression();
  9617. if (!n) return 0;
  9618. assert(_depth == 0);
  9619. // check if there are unparsed tokens left
  9620. if (_lexer.current() != lex_eof)
  9621. return error("Incorrect query");
  9622. return n;
  9623. }
  9624. static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
  9625. {
  9626. xpath_parser parser(query, variables, alloc, result);
  9627. return parser.parse();
  9628. }
  9629. };
  9630. struct xpath_query_impl
  9631. {
  9632. static xpath_query_impl* create()
  9633. {
  9634. void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
  9635. if (!memory) return 0;
  9636. return new (memory) xpath_query_impl();
  9637. }
  9638. static void destroy(xpath_query_impl* impl)
  9639. {
  9640. // free all allocated pages
  9641. impl->alloc.release();
  9642. // free allocator memory (with the first page)
  9643. xml_memory::deallocate(impl);
  9644. }
  9645. xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
  9646. {
  9647. block.next = 0;
  9648. block.capacity = sizeof(block.data);
  9649. }
  9650. xpath_ast_node* root;
  9651. xpath_allocator alloc;
  9652. xpath_memory_block block;
  9653. bool oom;
  9654. };
  9655. PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
  9656. {
  9657. if (!impl) return 0;
  9658. if (impl->root->rettype() != xpath_type_node_set)
  9659. {
  9660. #ifdef PUGIXML_NO_EXCEPTIONS
  9661. return 0;
  9662. #else
  9663. xpath_parse_result res;
  9664. res.error = "Expression does not evaluate to node set";
  9665. throw xpath_exception(res);
  9666. #endif
  9667. }
  9668. return impl->root;
  9669. }
  9670. PUGI__NS_END
  9671. namespace pugi
  9672. {
  9673. #ifndef PUGIXML_NO_EXCEPTIONS
  9674. PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
  9675. {
  9676. assert(_result.error);
  9677. }
  9678. PUGI__FN const char* xpath_exception::what() const throw()
  9679. {
  9680. return _result.error;
  9681. }
  9682. PUGI__FN const xpath_parse_result& xpath_exception::result() const
  9683. {
  9684. return _result;
  9685. }
  9686. #endif
  9687. PUGI__FN xpath_node::xpath_node()
  9688. {
  9689. }
  9690. PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
  9691. {
  9692. }
  9693. PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
  9694. {
  9695. }
  9696. PUGI__FN xml_node xpath_node::node() const
  9697. {
  9698. return _attribute ? xml_node() : _node;
  9699. }
  9700. PUGI__FN xml_attribute xpath_node::attribute() const
  9701. {
  9702. return _attribute;
  9703. }
  9704. PUGI__FN xml_node xpath_node::parent() const
  9705. {
  9706. return _attribute ? _node : _node.parent();
  9707. }
  9708. PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
  9709. {
  9710. }
  9711. PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
  9712. {
  9713. return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
  9714. }
  9715. PUGI__FN bool xpath_node::operator!() const
  9716. {
  9717. return !(_node || _attribute);
  9718. }
  9719. PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
  9720. {
  9721. return _node == n._node && _attribute == n._attribute;
  9722. }
  9723. PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
  9724. {
  9725. return _node != n._node || _attribute != n._attribute;
  9726. }
  9727. #ifdef __BORLANDC__
  9728. PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
  9729. {
  9730. return (bool)lhs && rhs;
  9731. }
  9732. PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
  9733. {
  9734. return (bool)lhs || rhs;
  9735. }
  9736. #endif
  9737. PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
  9738. {
  9739. assert(begin_ <= end_);
  9740. size_t size_ = static_cast<size_t>(end_ - begin_);
  9741. // use internal buffer for 0 or 1 elements, heap buffer otherwise
  9742. xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
  9743. if (!storage)
  9744. {
  9745. #ifdef PUGIXML_NO_EXCEPTIONS
  9746. return;
  9747. #else
  9748. throw std::bad_alloc();
  9749. #endif
  9750. }
  9751. // deallocate old buffer
  9752. if (_begin != _storage)
  9753. impl::xml_memory::deallocate(_begin);
  9754. // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
  9755. if (size_)
  9756. memcpy(storage, begin_, size_ * sizeof(xpath_node));
  9757. _begin = storage;
  9758. _end = storage + size_;
  9759. _type = type_;
  9760. }
  9761. #ifdef PUGIXML_HAS_MOVE
  9762. PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
  9763. {
  9764. _type = rhs._type;
  9765. _storage[0] = rhs._storage[0];
  9766. _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
  9767. _end = _begin + (rhs._end - rhs._begin);
  9768. rhs._type = type_unsorted;
  9769. rhs._begin = rhs._storage;
  9770. rhs._end = rhs._storage;
  9771. }
  9772. #endif
  9773. PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
  9774. {
  9775. }
  9776. PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
  9777. {
  9778. _assign(begin_, end_, type_);
  9779. }
  9780. PUGI__FN xpath_node_set::~xpath_node_set()
  9781. {
  9782. if (_begin != _storage)
  9783. impl::xml_memory::deallocate(_begin);
  9784. }
  9785. PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
  9786. {
  9787. _assign(ns._begin, ns._end, ns._type);
  9788. }
  9789. PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
  9790. {
  9791. if (this == &ns) return *this;
  9792. _assign(ns._begin, ns._end, ns._type);
  9793. return *this;
  9794. }
  9795. #ifdef PUGIXML_HAS_MOVE
  9796. PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
  9797. {
  9798. _move(rhs);
  9799. }
  9800. PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
  9801. {
  9802. if (this == &rhs) return *this;
  9803. if (_begin != _storage)
  9804. impl::xml_memory::deallocate(_begin);
  9805. _move(rhs);
  9806. return *this;
  9807. }
  9808. #endif
  9809. PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
  9810. {
  9811. return _type;
  9812. }
  9813. PUGI__FN size_t xpath_node_set::size() const
  9814. {
  9815. return _end - _begin;
  9816. }
  9817. PUGI__FN bool xpath_node_set::empty() const
  9818. {
  9819. return _begin == _end;
  9820. }
  9821. PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
  9822. {
  9823. assert(index < size());
  9824. return _begin[index];
  9825. }
  9826. PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
  9827. {
  9828. return _begin;
  9829. }
  9830. PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
  9831. {
  9832. return _end;
  9833. }
  9834. PUGI__FN void xpath_node_set::sort(bool reverse)
  9835. {
  9836. _type = impl::xpath_sort(_begin, _end, _type, reverse);
  9837. }
  9838. PUGI__FN xpath_node xpath_node_set::first() const
  9839. {
  9840. return impl::xpath_first(_begin, _end, _type);
  9841. }
  9842. PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
  9843. {
  9844. }
  9845. PUGI__FN xpath_parse_result::operator bool() const
  9846. {
  9847. return error == 0;
  9848. }
  9849. PUGI__FN const char* xpath_parse_result::description() const
  9850. {
  9851. return error ? error : "No error";
  9852. }
  9853. PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
  9854. {
  9855. }
  9856. PUGI__FN const char_t* xpath_variable::name() const
  9857. {
  9858. switch (_type)
  9859. {
  9860. case xpath_type_node_set:
  9861. return static_cast<const impl::xpath_variable_node_set*>(this)->name;
  9862. case xpath_type_number:
  9863. return static_cast<const impl::xpath_variable_number*>(this)->name;
  9864. case xpath_type_string:
  9865. return static_cast<const impl::xpath_variable_string*>(this)->name;
  9866. case xpath_type_boolean:
  9867. return static_cast<const impl::xpath_variable_boolean*>(this)->name;
  9868. default:
  9869. assert(false && "Invalid variable type"); // unreachable
  9870. return 0;
  9871. }
  9872. }
  9873. PUGI__FN xpath_value_type xpath_variable::type() const
  9874. {
  9875. return _type;
  9876. }
  9877. PUGI__FN bool xpath_variable::get_boolean() const
  9878. {
  9879. return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
  9880. }
  9881. PUGI__FN double xpath_variable::get_number() const
  9882. {
  9883. return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
  9884. }
  9885. PUGI__FN const char_t* xpath_variable::get_string() const
  9886. {
  9887. const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
  9888. return value ? value : PUGIXML_TEXT("");
  9889. }
  9890. PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
  9891. {
  9892. return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
  9893. }
  9894. PUGI__FN bool xpath_variable::set(bool value)
  9895. {
  9896. if (_type != xpath_type_boolean) return false;
  9897. static_cast<impl::xpath_variable_boolean*>(this)->value = value;
  9898. return true;
  9899. }
  9900. PUGI__FN bool xpath_variable::set(double value)
  9901. {
  9902. if (_type != xpath_type_number) return false;
  9903. static_cast<impl::xpath_variable_number*>(this)->value = value;
  9904. return true;
  9905. }
  9906. PUGI__FN bool xpath_variable::set(const char_t* value)
  9907. {
  9908. if (_type != xpath_type_string) return false;
  9909. impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
  9910. // duplicate string
  9911. size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
  9912. char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
  9913. if (!copy) return false;
  9914. memcpy(copy, value, size);
  9915. // replace old string
  9916. if (var->value) impl::xml_memory::deallocate(var->value);
  9917. var->value = copy;
  9918. return true;
  9919. }
  9920. PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
  9921. {
  9922. if (_type != xpath_type_node_set) return false;
  9923. static_cast<impl::xpath_variable_node_set*>(this)->value = value;
  9924. return true;
  9925. }
  9926. PUGI__FN xpath_variable_set::xpath_variable_set()
  9927. {
  9928. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9929. _data[i] = 0;
  9930. }
  9931. PUGI__FN xpath_variable_set::~xpath_variable_set()
  9932. {
  9933. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9934. _destroy(_data[i]);
  9935. }
  9936. PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
  9937. {
  9938. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9939. _data[i] = 0;
  9940. _assign(rhs);
  9941. }
  9942. PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
  9943. {
  9944. if (this == &rhs) return *this;
  9945. _assign(rhs);
  9946. return *this;
  9947. }
  9948. #ifdef PUGIXML_HAS_MOVE
  9949. PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  9950. {
  9951. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9952. {
  9953. _data[i] = rhs._data[i];
  9954. rhs._data[i] = 0;
  9955. }
  9956. }
  9957. PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
  9958. {
  9959. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9960. {
  9961. _destroy(_data[i]);
  9962. _data[i] = rhs._data[i];
  9963. rhs._data[i] = 0;
  9964. }
  9965. return *this;
  9966. }
  9967. #endif
  9968. PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
  9969. {
  9970. xpath_variable_set temp;
  9971. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9972. if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
  9973. return;
  9974. _swap(temp);
  9975. }
  9976. PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
  9977. {
  9978. for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
  9979. {
  9980. xpath_variable* chain = _data[i];
  9981. _data[i] = rhs._data[i];
  9982. rhs._data[i] = chain;
  9983. }
  9984. }
  9985. PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
  9986. {
  9987. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  9988. size_t hash = impl::hash_string(name) % hash_size;
  9989. // look for existing variable
  9990. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  9991. if (impl::strequal(var->name(), name))
  9992. return var;
  9993. return 0;
  9994. }
  9995. PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
  9996. {
  9997. xpath_variable* last = 0;
  9998. while (var)
  9999. {
  10000. // allocate storage for new variable
  10001. xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
  10002. if (!nvar) return false;
  10003. // link the variable to the result immediately to handle failures gracefully
  10004. if (last)
  10005. last->_next = nvar;
  10006. else
  10007. *out_result = nvar;
  10008. last = nvar;
  10009. // copy the value; this can fail due to out-of-memory conditions
  10010. if (!impl::copy_xpath_variable(nvar, var)) return false;
  10011. var = var->_next;
  10012. }
  10013. return true;
  10014. }
  10015. PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
  10016. {
  10017. while (var)
  10018. {
  10019. xpath_variable* next = var->_next;
  10020. impl::delete_xpath_variable(var->_type, var);
  10021. var = next;
  10022. }
  10023. }
  10024. PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
  10025. {
  10026. const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
  10027. size_t hash = impl::hash_string(name) % hash_size;
  10028. // look for existing variable
  10029. for (xpath_variable* var = _data[hash]; var; var = var->_next)
  10030. if (impl::strequal(var->name(), name))
  10031. return var->type() == type ? var : 0;
  10032. // add new variable
  10033. xpath_variable* result = impl::new_xpath_variable(type, name);
  10034. if (result)
  10035. {
  10036. result->_next = _data[hash];
  10037. _data[hash] = result;
  10038. }
  10039. return result;
  10040. }
  10041. PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
  10042. {
  10043. xpath_variable* var = add(name, xpath_type_boolean);
  10044. return var ? var->set(value) : false;
  10045. }
  10046. PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
  10047. {
  10048. xpath_variable* var = add(name, xpath_type_number);
  10049. return var ? var->set(value) : false;
  10050. }
  10051. PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
  10052. {
  10053. xpath_variable* var = add(name, xpath_type_string);
  10054. return var ? var->set(value) : false;
  10055. }
  10056. PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
  10057. {
  10058. xpath_variable* var = add(name, xpath_type_node_set);
  10059. return var ? var->set(value) : false;
  10060. }
  10061. PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
  10062. {
  10063. return _find(name);
  10064. }
  10065. PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
  10066. {
  10067. return _find(name);
  10068. }
  10069. PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
  10070. {
  10071. impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
  10072. if (!qimpl)
  10073. {
  10074. #ifdef PUGIXML_NO_EXCEPTIONS
  10075. _result.error = "Out of memory";
  10076. #else
  10077. throw std::bad_alloc();
  10078. #endif
  10079. }
  10080. else
  10081. {
  10082. using impl::auto_deleter; // MSVC7 workaround
  10083. auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
  10084. qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
  10085. if (qimpl->root)
  10086. {
  10087. qimpl->root->optimize(&qimpl->alloc);
  10088. _impl = impl.release();
  10089. _result.error = 0;
  10090. }
  10091. else
  10092. {
  10093. #ifdef PUGIXML_NO_EXCEPTIONS
  10094. if (qimpl->oom) _result.error = "Out of memory";
  10095. #else
  10096. if (qimpl->oom) throw std::bad_alloc();
  10097. throw xpath_exception(_result);
  10098. #endif
  10099. }
  10100. }
  10101. }
  10102. PUGI__FN xpath_query::xpath_query(): _impl(0)
  10103. {
  10104. }
  10105. PUGI__FN xpath_query::~xpath_query()
  10106. {
  10107. if (_impl)
  10108. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  10109. }
  10110. #ifdef PUGIXML_HAS_MOVE
  10111. PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
  10112. {
  10113. _impl = rhs._impl;
  10114. _result = rhs._result;
  10115. rhs._impl = 0;
  10116. rhs._result = xpath_parse_result();
  10117. }
  10118. PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
  10119. {
  10120. if (this == &rhs) return *this;
  10121. if (_impl)
  10122. impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
  10123. _impl = rhs._impl;
  10124. _result = rhs._result;
  10125. rhs._impl = 0;
  10126. rhs._result = xpath_parse_result();
  10127. return *this;
  10128. }
  10129. #endif
  10130. PUGI__FN xpath_value_type xpath_query::return_type() const
  10131. {
  10132. if (!_impl) return xpath_type_none;
  10133. return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
  10134. }
  10135. PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
  10136. {
  10137. if (!_impl) return false;
  10138. impl::xpath_context c(n, 1, 1);
  10139. impl::xpath_stack_data sd;
  10140. bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
  10141. if (sd.oom)
  10142. {
  10143. #ifdef PUGIXML_NO_EXCEPTIONS
  10144. return false;
  10145. #else
  10146. throw std::bad_alloc();
  10147. #endif
  10148. }
  10149. return r;
  10150. }
  10151. PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
  10152. {
  10153. if (!_impl) return impl::gen_nan();
  10154. impl::xpath_context c(n, 1, 1);
  10155. impl::xpath_stack_data sd;
  10156. double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
  10157. if (sd.oom)
  10158. {
  10159. #ifdef PUGIXML_NO_EXCEPTIONS
  10160. return impl::gen_nan();
  10161. #else
  10162. throw std::bad_alloc();
  10163. #endif
  10164. }
  10165. return r;
  10166. }
  10167. #ifndef PUGIXML_NO_STL
  10168. PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
  10169. {
  10170. if (!_impl) return string_t();
  10171. impl::xpath_context c(n, 1, 1);
  10172. impl::xpath_stack_data sd;
  10173. impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
  10174. if (sd.oom)
  10175. {
  10176. #ifdef PUGIXML_NO_EXCEPTIONS
  10177. return string_t();
  10178. #else
  10179. throw std::bad_alloc();
  10180. #endif
  10181. }
  10182. return string_t(r.c_str(), r.length());
  10183. }
  10184. #endif
  10185. PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
  10186. {
  10187. impl::xpath_context c(n, 1, 1);
  10188. impl::xpath_stack_data sd;
  10189. impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
  10190. if (sd.oom)
  10191. {
  10192. #ifdef PUGIXML_NO_EXCEPTIONS
  10193. r = impl::xpath_string();
  10194. #else
  10195. throw std::bad_alloc();
  10196. #endif
  10197. }
  10198. size_t full_size = r.length() + 1;
  10199. if (capacity > 0)
  10200. {
  10201. size_t size = (full_size < capacity) ? full_size : capacity;
  10202. assert(size > 0);
  10203. memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
  10204. buffer[size - 1] = 0;
  10205. }
  10206. return full_size;
  10207. }
  10208. PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
  10209. {
  10210. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10211. if (!root) return xpath_node_set();
  10212. impl::xpath_context c(n, 1, 1);
  10213. impl::xpath_stack_data sd;
  10214. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
  10215. if (sd.oom)
  10216. {
  10217. #ifdef PUGIXML_NO_EXCEPTIONS
  10218. return xpath_node_set();
  10219. #else
  10220. throw std::bad_alloc();
  10221. #endif
  10222. }
  10223. return xpath_node_set(r.begin(), r.end(), r.type());
  10224. }
  10225. PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
  10226. {
  10227. impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
  10228. if (!root) return xpath_node();
  10229. impl::xpath_context c(n, 1, 1);
  10230. impl::xpath_stack_data sd;
  10231. impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
  10232. if (sd.oom)
  10233. {
  10234. #ifdef PUGIXML_NO_EXCEPTIONS
  10235. return xpath_node();
  10236. #else
  10237. throw std::bad_alloc();
  10238. #endif
  10239. }
  10240. return r.first();
  10241. }
  10242. PUGI__FN const xpath_parse_result& xpath_query::result() const
  10243. {
  10244. return _result;
  10245. }
  10246. PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
  10247. {
  10248. }
  10249. PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
  10250. {
  10251. return _impl ? unspecified_bool_xpath_query : 0;
  10252. }
  10253. PUGI__FN bool xpath_query::operator!() const
  10254. {
  10255. return !_impl;
  10256. }
  10257. PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
  10258. {
  10259. xpath_query q(query, variables);
  10260. return q.evaluate_node(*this);
  10261. }
  10262. PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
  10263. {
  10264. return query.evaluate_node(*this);
  10265. }
  10266. PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
  10267. {
  10268. xpath_query q(query, variables);
  10269. return q.evaluate_node_set(*this);
  10270. }
  10271. PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
  10272. {
  10273. return query.evaluate_node_set(*this);
  10274. }
  10275. PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
  10276. {
  10277. xpath_query q(query, variables);
  10278. return q.evaluate_node(*this);
  10279. }
  10280. PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
  10281. {
  10282. return query.evaluate_node(*this);
  10283. }
  10284. }
  10285. #endif
  10286. #ifdef __BORLANDC__
  10287. # pragma option pop
  10288. #endif
  10289. // Intel C++ does not properly keep warning state for function templates,
  10290. // so popping warning state at the end of translation unit leads to warnings in the middle.
  10291. #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
  10292. # pragma warning(pop)
  10293. #endif
  10294. #if defined(_MSC_VER) && defined(__c2__)
  10295. # pragma clang diagnostic pop
  10296. #endif
  10297. // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
  10298. #undef PUGI__NO_INLINE
  10299. #undef PUGI__UNLIKELY
  10300. #undef PUGI__STATIC_ASSERT
  10301. #undef PUGI__DMC_VOLATILE
  10302. #undef PUGI__UNSIGNED_OVERFLOW
  10303. #undef PUGI__MSVC_CRT_VERSION
  10304. #undef PUGI__SNPRINTF
  10305. #undef PUGI__NS_BEGIN
  10306. #undef PUGI__NS_END
  10307. #undef PUGI__FN
  10308. #undef PUGI__FN_NO_INLINE
  10309. #undef PUGI__GETHEADER_IMPL
  10310. #undef PUGI__GETPAGE_IMPL
  10311. #undef PUGI__GETPAGE
  10312. #undef PUGI__NODETYPE
  10313. #undef PUGI__IS_CHARTYPE_IMPL
  10314. #undef PUGI__IS_CHARTYPE
  10315. #undef PUGI__IS_CHARTYPEX
  10316. #undef PUGI__ENDSWITH
  10317. #undef PUGI__SKIPWS
  10318. #undef PUGI__OPTSET
  10319. #undef PUGI__PUSHNODE
  10320. #undef PUGI__POPNODE
  10321. #undef PUGI__SCANFOR
  10322. #undef PUGI__SCANWHILE
  10323. #undef PUGI__SCANWHILE_UNROLL
  10324. #undef PUGI__ENDSEG
  10325. #undef PUGI__THROW_ERROR
  10326. #undef PUGI__CHECK_ERROR
  10327. #endif
  10328. /**
  10329. * Copyright (c) 2006-2022 Arseny Kapoulkine
  10330. *
  10331. * Permission is hereby granted, free of charge, to any person
  10332. * obtaining a copy of this software and associated documentation
  10333. * files (the "Software"), to deal in the Software without
  10334. * restriction, including without limitation the rights to use,
  10335. * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10336. * copies of the Software, and to permit persons to whom the
  10337. * Software is furnished to do so, subject to the following
  10338. * conditions:
  10339. *
  10340. * The above copyright notice and this permission notice shall be
  10341. * included in all copies or substantial portions of the Software.
  10342. *
  10343. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  10344. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  10345. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  10346. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  10347. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  10348. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  10349. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  10350. * OTHER DEALINGS IN THE SOFTWARE.
  10351. */