[CRIU] [PATCH 3/3] Issue #360: Anonymize image files

Harshavardhan Unnibhavi hvubfoss at gmail.com
Sat Jun 22 12:37:32 MSK 2019


This commit adds the file anonymizer function which anonymizes file names present in images.

The anonymized file names are just the shuffled names along the path from root.

Signed-off-by: Harshavardhan Unnibhavi <hvubfoss at gmail.com>
---
 lib/py/cli.py   |  9 ++++++-
 lib/py/strip.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 lib/py/strip.py

diff --git a/lib/py/cli.py b/lib/py/cli.py
index 17622fd2..4a8efeff 100755
--- a/lib/py/cli.py
+++ b/lib/py/cli.py
@@ -5,6 +5,7 @@ import json
 import os
 
 import pycriu
+import strip
 
 def inf(opts):
 	if opts['in']:
@@ -281,15 +282,21 @@ def anonymize(opts):
 	img_files = os.listdir(opts['in'])
 
 	for i in img_files:
-		temp = {'in':os.path.join(opts['in'], i)}
+		temp = {'in':os.path.join(opts['in'], i), 'out':os.path.join(opts['out'], i)}
 
 		try:
 			m, img = pycriu.images.load(inf(temp), anon_info = True)
+			print("Processing File name:{} with magic:{}".format(i, m))
 		except pycriu.images.MagicException as exc:
 			print("Unknown magic %#x.\n"\
 					"Found a raw image, continuing ..."% exc.magic, file=sys.stderr)
 			continue
 		
+		anon_dict = strip.anon_handler(img, m)
+		if anon_dict != -1:
+			pycriu.images.dump(anon_dict, outf(temp))
+		
+		
 
 explorers = { 'ps': explore_ps, 'fds': explore_fds, 'mems': explore_mems, 'rss': explore_rss }
 
diff --git a/lib/py/strip.py b/lib/py/strip.py
new file mode 100644
index 00000000..4069275c
--- /dev/null
+++ b/lib/py/strip.py
@@ -0,0 +1,66 @@
+# This file contains methods to deal with anonymising images.
+#
+# Contents being anonymised can be found at: https://github.com/checkpoint-restore/criu/issues/360
+#
+# Inorder to anonymise the image files three steps are followed:
+#    - decode the binary image to json
+#    - strip the necessary information from the json dict
+#    - encode the json dict back to a binary image, which is now anonymised
+
+import sys
+import json
+import random
+
+def files_anon(image):
+    levels = {}
+
+    for e in image['entries']:
+        f_path = e['reg']['name']
+        f_path = f_path.split('/')
+
+        lev_num = 0
+        for p in f_path:
+            if p == '':
+                continue
+            if lev_num in levels.keys():
+                if p not in levels[lev_num].keys():
+                    temp = list(p)
+                    random.shuffle(temp)
+                    levels[lev_num][p] = ''.join(temp)
+            else:
+                levels[lev_num] = {}
+                temp = list(p)
+                random.shuffle(temp)
+                levels[lev_num][p] = ''.join(temp)
+            lev_num += 1
+        
+    for i, e in enumerate(image['entries']):
+        f_path = e['reg']['name']
+        if f_path == '/':
+            continue
+        f_path = f_path.split('/')
+
+        lev_num = 0
+        for j, p in enumerate(f_path):
+            if p == '':
+                continue
+            f_path[j] = levels[lev_num][p]
+            lev_num += 1
+        f_path = '/'.join(f_path)
+        image['entries'][i]['reg']['name'] = f_path
+    
+    return image
+        
+
+
+
+anonymizers = {
+    'FILES': files_anon,
+    }
+
+def anon_handler(image, magic):
+    if magic != 'FILES':
+        return -1
+    handler = anonymizers[magic]
+    anon_image = handler(image)
+    return anon_image
\ No newline at end of file
-- 
2.17.1



More information about the CRIU mailing list