Remove specific XML nodes using Clojure

后端 未结 3 610
悲&欢浪女
悲&欢浪女 2021-01-24 07:45

I have the following XML structure:

(def xmlstr
\"
  
    AAA         


        
3条回答
  •  隐瞒了意图╮
    2021-01-24 08:06

    Examples below use full namespaces, not aliases. One way of solving this was using zippers:

    (defn remove-types-loc [types loc]
      (loop [loc loc]
        (if (clojure.zip/end? loc)
          (clojure.zip/root loc)
          (if (and (clojure.zip/branch? loc)
                   (some #(and (= (:tag %) :Type)
                               (contains? types (first (:content %)))) (clojure.zip/children loc)))
            (recur (clojure.zip/remove loc))
            (recur (clojure.zip/next loc))))))
    
    (clojure.data.xml/emit-str (remove-types-loc #{"B" "C"} zipxml))
    ;; => emits the expected result, with the two Type A Items
    

    The following gives the same result using core functions, but has quite a new nested levels and 'needs' two functions:

    (defn remove-types-in* [remove-types content]
      (update-in content [:content]
                 (fn [items]
                   (remove (fn [item]
                             (some #(and
                                     (= (:tag %) :Type)
                                     (contains? remove-types (first (:content %)))) (:content item)))
                    items))))
    
    (defn remove-types-in [remove-types xmldoc]
      (update-in xmldoc [:content] #(map (partial remove-types-in* remove-types) %)))
    
    (clojure.data.xml/emit-str (remove-types-in #{"B" "C"} xmldoc))
    ;; => same result as above
    

    Finally, when the structure is fixed and as simple as this one, it is easy to just construct the result manually. But this would break if the source gets more elements or attributes.

    (clojure.data.xml/emit-str
     (clojure.data.xml/sexp-as-element
      [:ROOT
       [:Items
        (for [i (clojure.data.zip.xml/xml-> zipxml :Items :Item)
              :let [t (clojure.data.zip.xml/xml1-> i :Type clojure.data.zip.xml/text)
                    n (clojure.data.zip.xml/xml1-> i :Note clojure.data.zip.xml/text)]
              :when (not (contains? #{"B" "C"} t))]
          [:Item
           [:Type t]
           [:Note n]])]]))
    ;; same as above
    

    Possibly a better version of the above, which will work even if Item structure changes:

    (clojure.data.xml/emit-str
     (clojure.data.xml/element
      :ROOT {}
      (clojure.data.xml/element
       :Items {}
       (for [n (xml-seq xmldoc)
             :when (and
                    (= :Item (:tag n))
                    (not (some #(and (= (:tag %) :Type)
                                     (contains? #{"B" "C"} (first (:content %))))
                               (:content n))))] n))))
    

    Didn't find any oneliners for doing it. Not sure if there are better / more readable ways of doing this using org.clojure or other libraries.

    For more complex XML editing, XSLT or XQuery Update is arguably a more 'native' solution. Here's a quick and dirty XSLT 2.0 solution using the open-source Saxon-HE S9API:

    ;; lein try net.sf.saxon/Saxon-HE "9.7.0-18"
    (defn remove-types-xslt [remove-types xmlstr]
      (let [processor (net.sf.saxon.s9api.Processor. false)
             compiler (.newXsltCompiler processor)
             exp (.compile compiler (javax.xml.transform.stream.StreamSource. (java.io.StringReader. "")))
             src (.build (.newDocumentBuilder processor) (javax.xml.transform.stream.StreamSource. (java.io.StringReader. xmlstr)))
             sw (java.io.StringWriter.)
             out (doto (net.sf.saxon.s9api.Serializer.) (.setOutputWriter sw))
             t (doto (.load exp) (.setInitialContextNode src) (.setDestination out) (.setParameter (net.sf.saxon.s9api.QName. "remove-types") (net.sf.saxon.s9api.XdmValue. (for [remove-type remove-types] (net.sf.saxon.s9api.XdmAtomicValue. remove-type)))) (.transform))]
        sw))
    (str (remove-types-xslt #{"B" "C"} xmlstr))
    

    And for completeness, here's an even dirtier version using XQuery Update Facility. Note that this particular example uses Saxon-EE, and therefore requires a paid EE license.

    (defn remove-types-xq [remove-types xmlstr]
      (let [processor (net.sf.saxon.s9api.Processor. true)
            compiler (doto (.newXQueryCompiler processor) (.setUpdatingEnabled true))
            exp (.compile compiler "declare variable $remove-types as xs:string+ external;delete nodes //Items/Item[Type[. = $remove-types]]")
            src (.build (doto (.newDocumentBuilder processor) (.setTreeModel net.sf.saxon.om.TreeModel/LINKED_TREE)) (javax.xml.transform.stream.StreamSource. (java.io.StringReader. xmlstr)))
            e (doto (.load exp) (.setContextItem src) (.setExternalVariable (net.sf.saxon.s9api.QName. "remove-types") (net.sf.saxon.s9api.XdmValue. (for [remove-type remove-types] (net.sf.saxon.s9api.XdmAtomicValue. remove-type)))) (.run))]
        (when-let [res (first (iterator-seq (.getUpdatedDocuments e)))]
          (let [sw (java.io.StringWriter.)
                out (doto (net.sf.saxon.s9api.Serializer.) (.setOutputWriter sw))]
            (.writeXdmValue processor res out)
            sw))))
    (str (remove-types-xq #{"B" "C"} xmlstr))
    

    Except for all the stuff, delete nodes //Items/Item[Type[. = $remove-types]] is pretty succinct.

提交回复
热议问题