Previous Topic: Schema NotesNext Topic: Universal Extractor


XML Schema for BTE Configuration File

The Binary Text Extractor configuration file, BinaryTextorConfig.xml, uses the following schema:

<?xml version="1.0" encoding="utf-8" ?> 
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" 
 xmlns:xs="http://www.w3.org/2001/XMLSchema">
<!--  FileType Element Type   --> 
<xs:complexType name="FileTypeType">
  <xs:sequence>
    <xs:sequence minOccurs="1" maxOccurs="unbounded">
      <xs:element name="MagicNumber" type="MagicNumberType" /> 
    </xs:sequence>
    <xs:sequence minOccurs="1" maxOccurs="unbounded">
      <xs:element name="Encoding" type="EncodingType" /> 
    </xs:sequence>
  </xs:sequence>

  <!--  The magic number can be a hex string or a text string  --> 
  <xs:attribute name="name" type="xs:string" /> 
  </xs:complexType>
  <!--  Top Level Element  --> 
  <xs:element name="UniversalBinaryTextor">
    <xs:complexType>
      <xs:sequence>
        <xs:sequence maxOccurs="unbounded" minOccurs="0">
          <xs:element name="FileType" type="FileTypeType" /> 
        </xs:sequence>
      </xs:sequence>
    </xs:complexType>
  </xs:element>

  <!--  CharSet Element Type  --> 
  <xs:complexType name="CharSetType">
    <!--  If both start and end are populated, these are used instead of 'name'  --> 
    <!--  'start' and 'end' are in hex, prefixed with 0x eg, 0xF007  --> 
    <xs:attribute name="start" type="xs:string" use="optional" /> 
    <xs:attribute name="end" type="xs:string" use="optional" /> 
    <!--  CharSet name can be any Block Name from http://www.unicode.org/Public/UNIDATA/Blocks.txt 
     Case, spaces, hyphens and underbars are ignored when comparing block names  --> 
    <xs:attribute name="blockName" type="xs:string" use="optional" /> 
  </xs:complexType>

  <!--  Supported Encodings  --> 
  <xs:simpleType name="EncodingEnumType">
    <xs:restriction base="xs:string">
      <xs:enumeration value="ASCII" /> 
      <xs:enumeration value="UTF8" /> 
      <xs:enumeration value="UTF16_LITTLEENDIAN" /> 
      <xs:enumeration value="UTF16_BIGENDIAN" /> 
    </xs:restriction>
  </xs:simpleType>

  <!--  Encoding Element Type  --> 
  <xs:complexType name="EncodingType">
    <xs:sequence minOccurs="1" maxOccurs="unbounded">
      <xs:element name="CharSet" type="CharSetType" /> 
    </xs:sequence>
    <xs:attribute name="name" type="EncodingEnumType" use="required" /> 
    <xs:attribute name="minLength" type="xs:unsignedByte" use="required" /> 
  </xs:complexType>
  <xs:complexType name="MagicNumberType">
    <xs:attribute name="value" type="xs:string" /> 
    <xs:attribute name="type" type="MagicNumberTypeEnum" /> 
    <xs:attribute name="offSet" type="xs:integer" /> 
  </xs:complexType>
  <xs:simpleType name="MagicNumberTypeEnum">
    <xs:restriction base="xs:string">
      <xs:enumeration value="ascii-string" /> 
      <xs:enumeration value="hex-string" /> 
    </xs:restriction>
  </xs:simpleType>
</xs:schema>