[CRIU] [PATCH 2/2] crit: Anonymize file paths in files.img

Harshavardhan Unnibhavi hvubfoss at gmail.com
Sun Jun 30 10:53:49 MSK 2019


File path names are replaced by their corresponding sha1 hash values.
The top level names such as bin, var, usr, lib etc, are kept unchanged.

Resolve Issue #360.

Signed-off-by: Harshavardhan Unnibhavi <hvubfoss at gmail.com>
---
 lib/py/anonymize.py | 72 +++++++++++++++++++++++++++++++++++++++++++++
 lib/py/cli.py       |  4 +++
 2 files changed, 76 insertions(+)
 create mode 100644 lib/py/anonymize.py

diff --git a/lib/py/anonymize.py b/lib/py/anonymize.py
new file mode 100644
index 00000000..42861696
--- /dev/null
+++ b/lib/py/anonymize.py
@@ -0,0 +1,72 @@
+# This file contains methods to anonymize criu images.
+
+# In order to anonymize images three steps are followed:
+#     - decode the binary image to json
+#     - strip the necessary information from the json dict
+#     - encode the json dict back to a binary image, which is now anonymized
+
+# The following contents are being anonymized:
+#     - Paths to files
+
+import hashlib
+
+def files_anon(image):
+    levels = {}
+
+    fname_key = 'reg'
+    checksum  = hashlib.sha1()
+
+    for e in image['entries']:
+        if fname_key in e:
+            f_path = e[fname_key]['name']
+
+        f_path  = f_path.split('/')
+        lev_num = 0
+
+        for i, p in enumerate(f_path):
+            if p == '':
+                continue
+            if lev_num not in levels:
+                levels[lev_num] = {}
+            if p not in levels[lev_num]:
+                if i == 1:
+                    levels[lev_num][p] = p
+                else:
+                    checksum.update(p)
+                    levels[lev_num][p] = checksum.hexdigest()
+            lev_num += 1
+
+    for i, e in enumerate(image['entries']):
+        if fname_key in e:
+            f_path = e[fname_key]['name']
+        
+        if f_path == '/':
+            continue
+        
+        f_path = f_path.split('/')
+        lev_num = 0
+
+        for j, p in enumerate(f_path):
+            if p == '':
+                continue
+            f_path[j] = levels[lev_num][p]
+            lev_num += 1
+        f_path = '/'.join(f_path)
+        image['entries'][i][fname_key]['name'] = f_path
+    
+    return image
+
+anonymizers = {
+    'FILES': files_anon
+}
+
+def anon_handler(image):
+    magic = image['magic']
+
+    if magic != 'FILES':
+        return -1
+    
+    handler  = anonymizers[magic]
+    anon_img = handler(image)
+
+    return anon_img
diff --git a/lib/py/cli.py b/lib/py/cli.py
index fdb24dbe..400c084e 100755
--- a/lib/py/cli.py
+++ b/lib/py/cli.py
@@ -6,6 +6,7 @@ import os
 import glob
 
 import pycriu
+from anonymize import anon_handler
 
 def inf(opts):
 	if opts['in']:
@@ -286,6 +287,9 @@ def anonymize(opts):
 
 		try:
 			img = pycriu.images.load(inf(inf_opts))
+			anon_dict = anon_handler(img)
+			if anon_dict != -1:
+				pycriu.images.dump(anon_dict, outf(inf_opts))
 		except pycriu.images.MagicException as exc:
 			print("Unknown magic %#x.\n"\
 					"Found a raw image" %exc.magic, file=sys.stderr)
-- 
2.17.1



More information about the CRIU mailing list