[CRIU] [PATCH 3/3] Issue #360: Anonymize image files

Pavel Emelianov xemul at virtuozzo.com
Mon Jun 24 15:00:16 MSK 2019


On 6/22/19 12:37 PM, Harshavardhan Unnibhavi wrote:
> This commit adds the file anonymizer function which anonymizes file names present in images.
> 
> The anonymized file names are just the shuffled names along the path from root.
> 
> Signed-off-by: Harshavardhan Unnibhavi <hvubfoss at gmail.com>
> ---
>  lib/py/cli.py   |  9 ++++++-
>  lib/py/strip.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 74 insertions(+), 1 deletion(-)
>  create mode 100644 lib/py/strip.py
> 
> diff --git a/lib/py/cli.py b/lib/py/cli.py
> index 17622fd2..4a8efeff 100755
> --- a/lib/py/cli.py
> +++ b/lib/py/cli.py
> @@ -5,6 +5,7 @@ import json
>  import os
>  
>  import pycriu
> +import strip
>  
>  def inf(opts):
>  	if opts['in']:
> @@ -281,15 +282,21 @@ def anonymize(opts):
>  	img_files = os.listdir(opts['in'])
>  
>  	for i in img_files:
> -		temp = {'in':os.path.join(opts['in'], i)}
> +		temp = {'in':os.path.join(opts['in'], i), 'out':os.path.join(opts['out'], i)}
>  
>  		try:
>  			m, img = pycriu.images.load(inf(temp), anon_info = True)
> +			print("Processing File name:{} with magic:{}".format(i, m))
>  		except pycriu.images.MagicException as exc:
>  			print("Unknown magic %#x.\n"\
>  					"Found a raw image, continuing ..."% exc.magic, file=sys.stderr)
>  			continue
>  		
> +		anon_dict = strip.anon_handler(img, m)
> +		if anon_dict != -1:
> +			pycriu.images.dump(anon_dict, outf(temp))

A message about skipping the file is needed.

> +		
> +		
>  
>  explorers = { 'ps': explore_ps, 'fds': explore_fds, 'mems': explore_mems, 'rss': explore_rss }
>  
> diff --git a/lib/py/strip.py b/lib/py/strip.py
> new file mode 100644
> index 00000000..4069275c
> --- /dev/null
> +++ b/lib/py/strip.py
> @@ -0,0 +1,66 @@
> +# This file contains methods to deal with anonymising images.
> +#
> +# Contents being anonymised can be found at: https://github.com/checkpoint-restore/criu/issues/360
> +#
> +# Inorder to anonymise the image files three steps are followed:
> +#    - decode the binary image to json
> +#    - strip the necessary information from the json dict
> +#    - encode the json dict back to a binary image, which is now anonymised
> +
> +import sys
> +import json
> +import random
> +
> +def files_anon(image):
> +    levels = {}
> +
> +    for e in image['entries']:
> +        f_path = e['reg']['name']
> +        f_path = f_path.split('/')
> +
> +        lev_num = 0
> +        for p in f_path:
> +            if p == '':
> +                continue
> +            if lev_num in levels.keys():
> +                if p not in levels[lev_num].keys():
> +                    temp = list(p)
> +                    random.shuffle(temp)

Erm, I'm not 100% it's OK to anonymize file paths like that.

> +                    levels[lev_num][p] = ''.join(temp)
> +            else:
> +                levels[lev_num] = {}
> +                temp = list(p)
> +                random.shuffle(temp)
> +                levels[lev_num][p] = ''.join(temp)

Can we factor out these two branches a bit? Smth like

            if lev_num not in levels.keys():
                levels[lev_num] = {}
            if p not in levels[lev_num].keys():
                temp = list(p)
                random.shuffle(temp)
                levels[lev_num][p] = ''.join(temp)

> +            lev_num += 1
> +        
> +    for i, e in enumerate(image['entries']):
> +        f_path = e['reg']['name']
> +        if f_path == '/':
> +            continue
> +        f_path = f_path.split('/')
> +
> +        lev_num = 0
> +        for j, p in enumerate(f_path):
> +            if p == '':
> +                continue
> +            f_path[j] = levels[lev_num][p]
> +            lev_num += 1
> +        f_path = '/'.join(f_path)
> +        image['entries'][i]['reg']['name'] = f_path
> +    
> +    return image
> +        
> +
> +
> +
> +anonymizers = {
> +    'FILES': files_anon,
> +    }
> +

Please, run lint tool on this file, I'm afraid this coding style is not correct. The config
file for lint is in scripts/flake8.cfg.

> +def anon_handler(image, magic):
> +    if magic != 'FILES':
> +        return -1
> +    handler = anonymizers[magic]
> +    anon_image = handler(image)
> +    return anon_image
> \ No newline at end of file
> 




More information about the CRIU mailing list