[CRIU] [PATCH 2/2] crit: Anonymize file paths in files.img
Pavel Emelianov
xemul at virtuozzo.com
Tue Jul 9 15:57:50 MSK 2019
On 6/30/19 10:53 AM, Harshavardhan Unnibhavi wrote:
> File path names are replaced by their corresponding sha1 hash values.
> The top level names such as bin, var, usr, lib etc, are kept unchanged.
>
> Resolve Issue #360.
This looks really good :)
Let's go ahead and try to teach criu-restore do the restore of the anonymized
images until the criu/pie/restorer.c's restore_task() line
restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
after which the whole restore just aborts and exits.
> Signed-off-by: Harshavardhan Unnibhavi <hvubfoss at gmail.com>
> ---
> lib/py/anonymize.py | 72 +++++++++++++++++++++++++++++++++++++++++++++
> lib/py/cli.py | 4 +++
> 2 files changed, 76 insertions(+)
> create mode 100644 lib/py/anonymize.py
>
> diff --git a/lib/py/anonymize.py b/lib/py/anonymize.py
> new file mode 100644
> index 00000000..42861696
> --- /dev/null
> +++ b/lib/py/anonymize.py
> @@ -0,0 +1,72 @@
> +# This file contains methods to anonymize criu images.
> +
> +# In order to anonymize images three steps are followed:
> +# - decode the binary image to json
> +# - strip the necessary information from the json dict
> +# - encode the json dict back to a binary image, which is now anonymized
> +
> +# The following contents are being anonymized:
> +# - Paths to files
> +
> +import hashlib
> +
> +def files_anon(image):
> + levels = {}
> +
> + fname_key = 'reg'
> + checksum = hashlib.sha1()
> +
> + for e in image['entries']:
> + if fname_key in e:
> + f_path = e[fname_key]['name']
> +
> + f_path = f_path.split('/')
> + lev_num = 0
> +
> + for i, p in enumerate(f_path):
> + if p == '':
> + continue
> + if lev_num not in levels:
> + levels[lev_num] = {}
> + if p not in levels[lev_num]:
> + if i == 1:
> + levels[lev_num][p] = p
> + else:
> + checksum.update(p)
> + levels[lev_num][p] = checksum.hexdigest()
> + lev_num += 1
> +
> + for i, e in enumerate(image['entries']):
> + if fname_key in e:
> + f_path = e[fname_key]['name']
> +
> + if f_path == '/':
> + continue
> +
> + f_path = f_path.split('/')
> + lev_num = 0
> +
> + for j, p in enumerate(f_path):
> + if p == '':
> + continue
> + f_path[j] = levels[lev_num][p]
> + lev_num += 1
> + f_path = '/'.join(f_path)
> + image['entries'][i][fname_key]['name'] = f_path
> +
> + return image
> +
> +anonymizers = {
> + 'FILES': files_anon
> +}
> +
> +def anon_handler(image):
> + magic = image['magic']
> +
> + if magic != 'FILES':
> + return -1
> +
> + handler = anonymizers[magic]
> + anon_img = handler(image)
> +
> + return anon_img
> diff --git a/lib/py/cli.py b/lib/py/cli.py
> index fdb24dbe..400c084e 100755
> --- a/lib/py/cli.py
> +++ b/lib/py/cli.py
> @@ -6,6 +6,7 @@ import os
> import glob
>
> import pycriu
> +from anonymize import anon_handler
>
> def inf(opts):
> if opts['in']:
> @@ -286,6 +287,9 @@ def anonymize(opts):
>
> try:
> img = pycriu.images.load(inf(inf_opts))
> + anon_dict = anon_handler(img)
> + if anon_dict != -1:
> + pycriu.images.dump(anon_dict, outf(inf_opts))
> except pycriu.images.MagicException as exc:
> print("Unknown magic %#x.\n"\
> "Found a raw image" %exc.magic, file=sys.stderr)
>
More information about the CRIU
mailing list