How to parse an XSD to get the information from elements using C#?

前端 未结 3 1964
[愿得一人]
[愿得一人] 2021-01-02 03:13

I have an XSD with multiple complex types and simple types (part of the file shown below). I need to parse this document to get maxLength from each of the simpletypes that

相关标签:
3条回答
  • 2021-01-02 03:49

    My solution may not be exactly what you are looking for. Probably you'd prefer using System.Xml classes to handle such informations. I don't know how much generic you'd like this parser to be, anyway these are just my 2 cents. My code just uses regular expressions designed to correctly face 99% of possibilities (I guess). Someone would call this like shooting a fly with a gun. Anyway that's it:

    using System.Text.RegularExpressions;
    using System.IO;
    
    static class Program
    {
        static void main()
        {
            XsdFile file = new XsdFile(@"c:\temp\test.xsd");
            Console.WriteLine(file.Query("Setup_Type"));
        }
    }
    
    public class XsdFile
    {
    
        Dictionary<string, XsdType> types;
    
        public XsdFile(string path)
        {
            string xsdBody = File.ReadAllText(path);
            types = XsdType.CreateTypes(xsdBody);
        }
    
        public string Query(string typename) {
            return Query(typename, "");
        }
    
        private string Query(string typename, string parent)
        {
            XsdType type;
            if (types.TryGetValue(typename, out type))
            {
                if (type.GetType() == typeof(ComplexType))
                {
                    StringBuilder sb = new StringBuilder();
                    ComplexType complexType = (ComplexType)type;
                    foreach (string elementName in complexType.elements.Keys)
                    {
                        string elementType = complexType.elements[elementName];
                        sb.AppendLine(Query(elementType, parent + "/" + elementName));
                    }
                    return sb.ToString();
                }
                else if (type.GetType() == typeof(SimpleType))
                {
                    SimpleType simpleType = (SimpleType)type;
                    return string.Format("{0} = {1}", parent, simpleType.maxLength);
                }
                else {
                    return "";
                }
            }
            else
            {
                return "";
            }
        }
    }
    
    public abstract class XsdType
    {
    
        string name;
    
        public XsdType(string name)
        {
            this.name = name;
        }
    
        public static Dictionary<string, XsdType> CreateTypes(string xsdBody)
        {
    
            Dictionary<string, XsdType> types = new Dictionary<string, XsdType>();
    
            MatchCollection mc_types = Regex.Matches(xsdBody, @"<xsd:(?<kind>complex|simple)Type[\s\t]+(?<attributes>[^>]+)>(?<body>.+?)</xsd:\1Type>", RegexOptions.Singleline);
            foreach (Match m_type in mc_types)
            {
                string typeKind = m_type.Groups["kind"].Value;
                string typeAttributes = m_type.Groups["attributes"].Value;
                string typeBody = m_type.Groups["body"].Value;
                string typeName;
                Match m_nameattribute = Regex.Match(typeAttributes, @"name[\s\t]*=[\s\t]*""(?<name>[^""]+)""", RegexOptions.Singleline);
                if (m_nameattribute.Success)
                {
                    typeName = m_nameattribute.Groups["name"].Value;
                    if (typeKind == "complex")
                    {
                        ComplexType current_type = new ComplexType(typeName);
                        MatchCollection mc_elements = Regex.Matches(typeBody, @"<xsd:element(?<attributes>.+?)/>", RegexOptions.Singleline);
                        foreach (Match m_element in mc_elements)
                        {
                            Dictionary<string, string> elementAttributes = ParseAttributes(m_element.Groups["attributes"].Value);
                            string elementName;
                            string elementType;
                            if (!elementAttributes.TryGetValue("name", out elementName))
                                continue;
                            if (!elementAttributes.TryGetValue("type", out elementType))
                                continue;
                            current_type.elements.Add(elementName, elementType);
                        }
                        types.Add(current_type.name, current_type);
                    }
                    else if (typeKind == "simple")
                    {
                        Match m_maxLength = Regex.Match(typeBody, @"<xsd:restriction[^>]+>.+?<xsd:maxLength.+?value=""(?<maxLength>[^""]+)""", RegexOptions.Singleline);
                        if (m_maxLength.Success)
                        {
                            string maxLength = m_maxLength.Groups["maxLength"].Value;
                            SimpleType current_type = new SimpleType(typeName);
                            current_type.maxLength = maxLength;
                            types.Add(current_type.name, current_type);
                        }
                    }
                }
                else
                {
                    continue;
                }
            }
            return types;
        }
    
        private static Dictionary<string, string> ParseAttributes(string value)
        {
            Dictionary<string, string> attributes = new Dictionary<string, string>();
            MatchCollection mc_attributes = Regex.Matches(value, @"(?<name>[^=\s\t]+)[\s\t]*=[\s\t]*""(?<value>[^""]+)""", RegexOptions.Singleline);
            foreach (Match m_attribute in mc_attributes)
            {
                attributes.Add(m_attribute.Groups["name"].Value, m_attribute.Groups["value"].Value);
            }
            return attributes;
        }
    
    }
    
    public class SimpleType : XsdType
    {
    
        public string maxLength;
    
        public SimpleType(string name)
            : base(name)
        {
        }
    
    }
    
    public class ComplexType : XsdType
    {
    
        //(name-type)
        public Dictionary<string, string> elements = new Dictionary<string,string>();
    
        public ComplexType(string name)
            : base(name)
        {
        }
    
    }
    
    0 讨论(0)
  • 2021-01-02 04:06
    public class result_tree
    {
        public string nodevalue = "";
    
        public bool IsTerminal { get { return ChildCount == 0; } }
    
        public List<result_tree> children = new List<result_tree>();
    
        public int ChildCount { get { return children.Count; } }
    
        public result_tree(string v) { nodevalue = v; }
    
        private void print_children(bool skip, string prefix)
        {
            if (IsTerminal)
                Console.WriteLine(prefix + (prefix.Length==0?"":"/") + nodevalue);
            else
                foreach (result_tree rt in children)
                    rt.print_children(false,prefix + (prefix.Length == 0 ? "" : "/") + (skip?"":nodevalue));
        }
    
        public void print_children()
        {
            print_children(true,"");
        }
    }
    
    static class Program
    {
        private static void ValidationCallBack(object sender, ValidationEventArgs args)
        {
            Console.WriteLine(args.Message);
        }
    
        public static result_tree results;
    
    
    
        static string deref_simple(XmlSchemaSimpleType simp)
        {
            XmlSchemaSimpleTypeRestriction xsstr = (XmlSchemaSimpleTypeRestriction)simp.Content;
            foreach (object o in xsstr.Facets)
            {
                if (o.GetType() == typeof(XmlSchemaMaxLengthFacet))
                {
                    XmlSchemaMaxLengthFacet fac = (XmlSchemaMaxLengthFacet)o;
                    return fac.Value;
                }
            }
            return "";
        }
    
        static result_tree deref_complex(XmlSchema xs, XmlSchemaComplexType cplx)
        {
            result_tree rt = new result_tree(cplx.Name);
    
            if (cplx.Particle.GetType() == typeof(XmlSchemaSequence))
            {
                XmlSchemaSequence seq = (XmlSchemaSequence)cplx.Particle;
                foreach (object o in seq.Items)
                {
                    if (o.GetType() == typeof(XmlSchemaElement))
                    {
                        XmlSchemaElement elem = (XmlSchemaElement)o;
    
                        XmlQualifiedName name = elem.SchemaTypeName;
    
                        result_tree branch;
    
                        object referto = xs.SchemaTypes[name];
                        if (referto.GetType() == typeof(XmlSchemaComplexType))
                        {
                            branch = deref_complex(xs,(XmlSchemaComplexType)referto);
                            branch.nodevalue = elem.Name;
                        }
                        else if (referto.GetType() == typeof(XmlSchemaSimpleType))
                        {
                            XmlSchemaSimpleType st = (XmlSchemaSimpleType)referto;
    
                            branch = new result_tree(elem.Name + " = " + deref_simple(st).ToString());
                        }
                        else
                        {
                            branch = null;
                        }
                        if( branch != null )
                            rt.children.Add(branch);
    
                    }
                }
            }
    
            return rt;
        }
    
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        [STAThread]
        static void Main()
        {
    
            StreamReader sr = new StreamReader("aschema.xml");
            XmlSchema xs = XmlSchema.Read(sr, ValidationCallBack);
            XmlSchemaSet xss = new XmlSchemaSet();
            xss.Add(xs);
            xss.Compile();
    
            Console.WriteLine("Query: ");
            string q = Console.ReadLine();
    
            XmlQualifiedName xqn = new XmlQualifiedName(q);
    
            if (xs.SchemaTypes.Contains(xqn))
            {
                object o = xs.SchemaTypes[xqn];
                if (o.GetType() == typeof(XmlSchemaComplexType))
                {
                    results = deref_complex(xs, (XmlSchemaComplexType)o);
                    results.print_children();
                }   
            }
            else
            {
                Console.WriteLine("Not found!");
            }
    
        }
    }
    
    0 讨论(0)
  • 2021-01-02 04:12

    I have seen similar questions asked in the past (full disclosure, I've ask a similar question myself). Parsing an XSD is not for the faint of heart.

    You basically have 2 options, first is easier to implement, but can be broken more easily by minor changes to the XSD. the 2nd is a more robust but hard to implement.

    Option 1:

    Parsing the XSD with LINQ (or other C# XML parser if you prefer). Since an XSD is just an XML, you can load it into an XDocument and just read it via LINQ.

    For just a sample of your own XSD:

    <xsd:simpleType name="Amount_Type">
      <xsd:annotation>
        <xsd:documentation>Amount</xsd:documentation>
      </xsd:annotation>
      <xsd:restriction base="xsd:string">
        <xsd:maxLength value="12" />
      </xsd:restriction>
    </xsd:simpleType>
    

    You can access the MaxLength:

    var xDoc = XDocument.Load("your XSD path");
    var ns = XNamespace.Get(@"http://www.w3.org/2001/XMLSchema");
    
    var length = (from sType in xDoc.Element(ns + "schema").Elements(ns + "simpleType")
                  where sType.Attribute("name").Value == "Amount_Type"
                  from r in sType.Elements(ns + "restriction")
                  select r.Element(ns + "maxLength").Attribute("value")
                          .Value).FirstOrDefault();
    

    This does not offer a very easy method for parsing by type name, especially for extended types. To use this you need to know the exact path for each element you are looking for.

    Option 2:

    This is far too complex for a quick answer (note: see the edit below - I had some time and put together a working solution), so I am going to encourage you to look at my own question I linked above. In it, I linked a great blog that shows how to seriously break down the XSD into pieces and might allow you to perform the type of search you want. You have to decide if it is worth the effort to develop it (the blog shows an implementation with XmlReader that contains an XML that is validated against the XSD in question, but you can easily accomplish this by directly loading the XSD and parsing it.

    2 key idea to find in the blog are:

    // in the getRestriction method (reader in this context is an `XmlReader` that 
    //  contains a XML that is being validated against the specific XSD
    if (reader.SchemaInfo.SchemaElement == null) return null;
    simpleType = reader.SchemaInfo.SchemaElement.ElementSchemaType as XmlSchemaSimpleType;
    if (simpleType == null) return null;
    restriction = simpleType.Content as XmlSchemaSimpleTypeRestriction;
    
    // then in the getMaxLength method
    if (restriction == null) return null;
    List<int> result = new List<int>();
    foreach (XmlSchemaObject facet in restriction.Facets) {
    if (facet is XmlSchemaMaxLengthFacet) result.Add(int.Parse(((XmlSchemaFacet) facet).Value));
    

    I actually tried the same thing last year to parse an XSD as part of a complicated data validation method. It took me the better part of a week to really understand what was happening an to adapt the methods in the blog to suit my purposes. It is definitely the best way to implement exactly what you want.

    If you want to try this with a standalone schema, you can load the XSD into an XmlSchemaSet object, then use the GlobalTypes property to help you find the specific type you are looking for.


    EDIT: I pulled up my old code and started putting together the code to help you.

    First to load your schema:

    XmlSchemaSet set; // this needs to be accessible to the methods below,
                      //  so should be a class level field or property
    
    using (var fs = new FileStream(@"your path here", FileMode.Open)
    {
        var schema = XmlSchema.Read(fs, null);
    
        set = new XmlSchemaSet();
        set.Add(schema);
        set.Compile();
    }
    

    The following methods should give you close to what you want based on the XSD you provided. It should be pretty adaptable to deal with more complex structures.

    public Dictionary<string, int> GetElementMaxLength(String xsdElementName)
    {
        if (xsdElementName == null) throw new ArgumentException();
        // if your XSD has a target namespace, you need to replace null with the namespace name
        var qname = new XmlQualifiedName(xsdElementName, null);
    
        // find the type you want in the XmlSchemaSet    
        var parentType = set.GlobalTypes[qname];
    
        // call GetAllMaxLength with the parentType as parameter
        var results = GetAllMaxLength(parentType);
    
        return results;
    }
    
    private Dictionary<string, int> GetAllMaxLength(XmlSchemaObject obj)
    {
        Dictionary<string, int> dict = new Dictionary<string, int>();
    
        // do some type checking on the XmlSchemaObject
        if (obj is XmlSchemaSimpleType)
        {
            // if it is a simple type, then call GetMaxLength to get the MaxLength restriction
            var st = obj as XmlSchemaSimpleType;
            dict[st.QualifiedName.Name] = GetMaxLength(st);
        }
        else if (obj is XmlSchemaComplexType)
        {
    
            // if obj is a complexType, cast the particle type to a sequence
            //  and iterate the sequence
            //  warning - this will fail if it is not a sequence, so you might need
            //  to make some adjustments if you have something other than a xs:sequence
            var ct = obj as XmlSchemaComplexType;
            var seq = ct.ContentTypeParticle as XmlSchemaSequence;
    
            foreach (var item in seq.Items)
            {
                // item will be an XmlSchemaObject, so just call this same method
                //  with item as the parameter to parse it out
                var rng = GetAllMaxLength(item);
    
                // add the results to the dictionary
                foreach (var kvp in rng)
                {
                    dict[kvp.Key] = kvp.Value;
                }
            }
        }
        else if (obj is XmlSchemaElement)
        {
            // if obj is an XmlSchemaElement, the you need to find the type
            //  based on the SchemaTypeName property.  This is why your 
            //  XmlSchemaSet needs to have class-level scope
            var ele = obj as XmlSchemaElement;
            var type = set.GlobalTypes[ele.SchemaTypeName];
    
            // once you have the type, call this method again and get the dictionary result
            var rng = GetAllMaxLength(type);
    
            // put the results in this dictionary.  The difference here is the dictionary
            //  key is put in the format you specified
            foreach (var kvp in rng)
            {
                dict[String.Format("{0}/{1}", ele.QualifiedName.Name, kvp.Key)] = kvp.Value;
            }
        }
    
        return dict;
    }
    
    private Int32 GetMaxLength(XmlSchemaSimpleType xsdSimpleType)
    {
        // get the content of the simple type
        var restriction = xsdSimpleType.Content as XmlSchemaSimpleTypeRestriction;
    
        // if it is null, then there are no restrictions and return -1 as a marker value
        if (restriction == null) return -1;
    
        Int32 result = -1;
    
        // iterate the facets in the restrictions, look for a MaxLengthFacet and parse the value
        foreach (XmlSchemaObject facet in restriction.Facets)
        {
            if (facet is XmlSchemaMaxLengthFacet)
            {
                result = int.Parse(((XmlSchemaFacet)facet).Value);
                break;
            }
        }
    
        return result;
    }
    

    Then the usage is pretty simple, you just need to call the GetElementMaxLength(String) method and it will return a dictionary of the names in the format you provided with the value as the max length:

    var results = GetElementMaxLength("Setup_Type");
    
    foreach (var item in results)
    {
        Console.WriteLine("{0} | {1}", item.Key, item.Value);                
    }
    
    0 讨论(0)
提交回复
热议问题