[CRIU] [PATCH 2/2] crit: Anonymize file paths in files.img
Harshavardhan Unnibhavi
hvubfoss at gmail.com
Sun Jun 30 10:53:49 MSK 2019
File path names are replaced by their corresponding sha1 hash values.
The top level names such as bin, var, usr, lib etc, are kept unchanged.
Resolve Issue #360.
Signed-off-by: Harshavardhan Unnibhavi <hvubfoss at gmail.com>
---
lib/py/anonymize.py | 72 +++++++++++++++++++++++++++++++++++++++++++++
lib/py/cli.py | 4 +++
2 files changed, 76 insertions(+)
create mode 100644 lib/py/anonymize.py
diff --git a/lib/py/anonymize.py b/lib/py/anonymize.py
new file mode 100644
index 00000000..42861696
--- /dev/null
+++ b/lib/py/anonymize.py
@@ -0,0 +1,72 @@
+# This file contains methods to anonymize criu images.
+
+# In order to anonymize images three steps are followed:
+# - decode the binary image to json
+# - strip the necessary information from the json dict
+# - encode the json dict back to a binary image, which is now anonymized
+
+# The following contents are being anonymized:
+# - Paths to files
+
+import hashlib
+
+def files_anon(image):
+ levels = {}
+
+ fname_key = 'reg'
+ checksum = hashlib.sha1()
+
+ for e in image['entries']:
+ if fname_key in e:
+ f_path = e[fname_key]['name']
+
+ f_path = f_path.split('/')
+ lev_num = 0
+
+ for i, p in enumerate(f_path):
+ if p == '':
+ continue
+ if lev_num not in levels:
+ levels[lev_num] = {}
+ if p not in levels[lev_num]:
+ if i == 1:
+ levels[lev_num][p] = p
+ else:
+ checksum.update(p)
+ levels[lev_num][p] = checksum.hexdigest()
+ lev_num += 1
+
+ for i, e in enumerate(image['entries']):
+ if fname_key in e:
+ f_path = e[fname_key]['name']
+
+ if f_path == '/':
+ continue
+
+ f_path = f_path.split('/')
+ lev_num = 0
+
+ for j, p in enumerate(f_path):
+ if p == '':
+ continue
+ f_path[j] = levels[lev_num][p]
+ lev_num += 1
+ f_path = '/'.join(f_path)
+ image['entries'][i][fname_key]['name'] = f_path
+
+ return image
+
+anonymizers = {
+ 'FILES': files_anon
+}
+
+def anon_handler(image):
+ magic = image['magic']
+
+ if magic != 'FILES':
+ return -1
+
+ handler = anonymizers[magic]
+ anon_img = handler(image)
+
+ return anon_img
diff --git a/lib/py/cli.py b/lib/py/cli.py
index fdb24dbe..400c084e 100755
--- a/lib/py/cli.py
+++ b/lib/py/cli.py
@@ -6,6 +6,7 @@ import os
import glob
import pycriu
+from anonymize import anon_handler
def inf(opts):
if opts['in']:
@@ -286,6 +287,9 @@ def anonymize(opts):
try:
img = pycriu.images.load(inf(inf_opts))
+ anon_dict = anon_handler(img)
+ if anon_dict != -1:
+ pycriu.images.dump(anon_dict, outf(inf_opts))
except pycriu.images.MagicException as exc:
print("Unknown magic %#x.\n"\
"Found a raw image" %exc.magic, file=sys.stderr)
--
2.17.1
More information about the CRIU
mailing list