[CRIU] [PATCH 2/2] crit: Anonymize file paths in files.img

Pavel Emelianov xemul at virtuozzo.com
Tue Jul 9 15:57:50 MSK 2019


On 6/30/19 10:53 AM, Harshavardhan Unnibhavi wrote:
> File path names are replaced by their corresponding sha1 hash values.
> The top level names such as bin, var, usr, lib etc, are kept unchanged.
> 
> Resolve Issue #360.

This looks really good :)

Let's go ahead and try to teach criu-restore do the restore of the anonymized 
images until the criu/pie/restorer.c's restore_task() line

	restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);

after which the whole restore just aborts and exits.

> Signed-off-by: Harshavardhan Unnibhavi <hvubfoss at gmail.com>
> ---
>  lib/py/anonymize.py | 72 +++++++++++++++++++++++++++++++++++++++++++++
>  lib/py/cli.py       |  4 +++
>  2 files changed, 76 insertions(+)
>  create mode 100644 lib/py/anonymize.py
> 
> diff --git a/lib/py/anonymize.py b/lib/py/anonymize.py
> new file mode 100644
> index 00000000..42861696
> --- /dev/null
> +++ b/lib/py/anonymize.py
> @@ -0,0 +1,72 @@
> +# This file contains methods to anonymize criu images.
> +
> +# In order to anonymize images three steps are followed:
> +#     - decode the binary image to json
> +#     - strip the necessary information from the json dict
> +#     - encode the json dict back to a binary image, which is now anonymized
> +
> +# The following contents are being anonymized:
> +#     - Paths to files
> +
> +import hashlib
> +
> +def files_anon(image):
> +    levels = {}
> +
> +    fname_key = 'reg'
> +    checksum  = hashlib.sha1()
> +
> +    for e in image['entries']:
> +        if fname_key in e:
> +            f_path = e[fname_key]['name']
> +
> +        f_path  = f_path.split('/')
> +        lev_num = 0
> +
> +        for i, p in enumerate(f_path):
> +            if p == '':
> +                continue
> +            if lev_num not in levels:
> +                levels[lev_num] = {}
> +            if p not in levels[lev_num]:
> +                if i == 1:
> +                    levels[lev_num][p] = p
> +                else:
> +                    checksum.update(p)
> +                    levels[lev_num][p] = checksum.hexdigest()
> +            lev_num += 1
> +
> +    for i, e in enumerate(image['entries']):
> +        if fname_key in e:
> +            f_path = e[fname_key]['name']
> +        
> +        if f_path == '/':
> +            continue
> +        
> +        f_path = f_path.split('/')
> +        lev_num = 0
> +
> +        for j, p in enumerate(f_path):
> +            if p == '':
> +                continue
> +            f_path[j] = levels[lev_num][p]
> +            lev_num += 1
> +        f_path = '/'.join(f_path)
> +        image['entries'][i][fname_key]['name'] = f_path
> +    
> +    return image
> +
> +anonymizers = {
> +    'FILES': files_anon
> +}
> +
> +def anon_handler(image):
> +    magic = image['magic']
> +
> +    if magic != 'FILES':
> +        return -1
> +    
> +    handler  = anonymizers[magic]
> +    anon_img = handler(image)
> +
> +    return anon_img
> diff --git a/lib/py/cli.py b/lib/py/cli.py
> index fdb24dbe..400c084e 100755
> --- a/lib/py/cli.py
> +++ b/lib/py/cli.py
> @@ -6,6 +6,7 @@ import os
>  import glob
>  
>  import pycriu
> +from anonymize import anon_handler
>  
>  def inf(opts):
>  	if opts['in']:
> @@ -286,6 +287,9 @@ def anonymize(opts):
>  
>  		try:
>  			img = pycriu.images.load(inf(inf_opts))
> +			anon_dict = anon_handler(img)
> +			if anon_dict != -1:
> +				pycriu.images.dump(anon_dict, outf(inf_opts))
>  		except pycriu.images.MagicException as exc:
>  			print("Unknown magic %#x.\n"\
>  					"Found a raw image" %exc.magic, file=sys.stderr)
> 




More information about the CRIU mailing list