gp_parse: Add a generalize XML function to the top level parser
authorGarming Sam <garming@catalyst.net.nz>
Mon, 28 May 2018 23:57:26 +0000 (11:57 +1200)
committerAndrew Bartlett <abartlet@samba.org>
Thu, 16 Aug 2018 21:42:22 +0000 (23:42 +0200)
In this function we take XML and using the required metadata, we rewrite
it into a generic form using entities. ElementTree unfortunately does
not allow us to store unescaped entities, and so we must do a textual
replace on the output XML.

Signed-off-by: Garming Sam <garming@catalyst.net.nz>
Reviewed-by: Andrew Bartlett <abartlet@samba.org>
python/samba/gp_parse/__init__.py

index a8beb87e5918445bc08a8e99147bb6e851e0a6ac..3b31a451d615b994de6fc696af5143d4a9535ffb 100644 (file)
 
 from xml.dom import minidom
 from io import BytesIO
-from xml.etree.ElementTree import ElementTree
+from xml.etree.ElementTree import ElementTree, fromstring, tostring
+
+
+ENTITY_USER_ID = 0
+ENTITY_SDDL_ACL = 1
+ENTITY_NETWORK_PATH = 2
+
 
 class GPNoParserException(Exception):
     pass
 
+class GPGeneralizeException(Exception):
+    pass
+
+
+def entity_type_to_string(ent_type):
+    type_str = None
+
+    if ent_type == ENTITY_USER_ID:
+        type_str = "USER_ID"
+    elif ent_type == ENTITY_SDDL_ACL:
+        type_str = "SDDL_ACL"
+    elif ent_type == ENTITY_NETWORK_PATH:
+        type_str = "NETWORK_PATH"
+
+    return type_str
+
+
 # [MS-GPIPSEC] (LDAP)
 # [MS-GPDPC] Deployed Printer Connections (LDAP)
 # [MS-GPPREF] Preferences Extension (XML)
@@ -55,3 +78,105 @@ class GPParser(object):
                  xml_declaration=True)
         minidom_parsed = minidom.parseString(temporary_bytes.getvalue())
         handle.write(minidom_parsed.toprettyxml(encoding=self.output_encoding))
+
+    def new_xml_entity(self, global_entities, ent_type):
+        identifier = str(len(global_entities)).zfill(4)
+
+        type_str = entity_type_to_string(ent_type)
+
+        if type_str is None:
+            raise GPGeneralizeException("No such entity type")
+
+        # For formattting reasons, align the length of the entities
+        longest = entity_type_to_string(ENTITY_NETWORK_PATH)
+        type_str = type_str.center(len(longest), '_')
+
+        return "&SAMBA__{}__{}__;".format(type_str, identifier)
+
+    def generalize_xml(self, root, out_file, global_entities):
+        entities = []
+
+        # Locate all user_id and all ACLs
+        user_ids = root.findall('.//*[@user_id="TRUE"]')
+        user_ids.sort()
+
+        for elem in user_ids:
+            old_text = elem.text
+            if old_text is None or old_text == '':
+                continue
+
+            if old_text in global_entities:
+                elem.text = global_entities[old_text]
+                entities.append((elem.text, old_text))
+            else:
+                elem.text = self.new_xml_entity(global_entities,
+                                                ENTITY_USER_ID)
+
+                entities.append((elem.text, old_text))
+                global_entities.update([(old_text, elem.text)])
+
+        acls = root.findall('.//*[@acl="TRUE"]')
+        acls.sort()
+
+        for elem in acls:
+            old_text = elem.text
+
+            if old_text is None or old_text == '':
+                continue
+
+            if old_text in global_entities:
+                elem.text = global_entities[old_text]
+                entities.append((elem.text, old_text))
+            else:
+                elem.text = self.new_xml_entity(global_entities,
+                                                ENTITY_SDDL_ACL)
+
+                entities.append((elem.text, old_text))
+                global_entities.update([(old_text, elem.text)])
+
+        share_paths = root.findall('.//*[@network_path="TRUE"]')
+        share_paths.sort()
+
+        for elem in share_paths:
+            old_text = elem.text
+
+            if old_text is None or old_text == '':
+                continue
+
+            stripped = old_text.lstrip('\\')
+            file_server = stripped.split('\\')[0]
+
+            server_index = old_text.find(file_server)
+
+            remaining = old_text[server_index + len(file_server):]
+            old_text = old_text[:server_index] + file_server
+
+            if old_text in global_entities:
+                elem.text = global_entities[old_text] + remaining
+                to_put = global_entities[old_text]
+                entities.append((to_put, old_text))
+            else:
+                to_put = self.new_xml_entity(global_entities,
+                                             ENTITY_NETWORK_PATH)
+                elem.text = to_put + remaining
+
+                entities.append((to_put, old_text))
+                global_entities.update([(old_text, to_put)])
+
+        # Call any file specific customization of entities
+        # (which appear in any subclasses).
+        entities.extend(self.custom_entities(root, global_entities))
+
+        output_xml = tostring(root)
+
+        for ent in entities:
+            output_xml = output_xml.replace(ent[0].replace('&', '&amp;'), ent[0])
+
+        with open(out_file, 'wb') as f:
+            f.write(output_xml)
+
+        return entities
+
+    def custom_entities(self, root, global_entities):
+        # Override this method to do special entity handling
+        return []