validate.py 2.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
#!/usr/bin/env python3
""" Call validation code """

from pathlib import Path

import requests

import ujson as json


class ValidatorHelper:
    """ Help validating tabular data """

    schema_dict = {}

    @classmethod
    def init(cls, schema_info, cache_dir: Path):
        """ Register and download schema and custom_checks info """
        cls.schema_dict = {}
        cls.cache_dir = cache_dir
        for code in schema_info:
            print('Downloading schema {}'.format(code))
            schema = schema_info[code].copy()

            # schema download
            schema['schema'] = cls.json_download(schema['schema_json_url'],
                                                 '{}_schema.json'.format(code))

            # custom_checks
            if 'custom_checks_json_url' in schema:
                schema['custom_checks'] = cls.json_download(schema['custom_checks_json_url'],
                                                            '{}_custom_checks.json'.format(code))
            cls.schema_dict[code] = schema

    @classmethod
    def json_download(cls, url, filename):
        """ Download url content as JSON """

        if cls.cache_dir is None:
            return requests.get(url).json()

        cache_file = cls.cache_dir / filename
        if cache_file.exists():
            with cache_file.open('rt', encoding='utf-8') as fd:
                return json.load(fd)
        else:
            data = requests.get(url).json()
            with cache_file.open("wt", encoding='utf-8') as fd:
                json.dump(data, fd, ensure_ascii=False, sort_keys=True, indent=2)
            return data

    @classmethod
    def schema_exist(cls, schema_code):
        """ Checks if schema exists """
        return schema_code in cls.schema_dict

    @classmethod
    def schema_info(cls, schema_code):
        """ Return schema info from code """
        if not cls.schema_exist(schema_code):
            return None
Pierre Dittgen's avatar
Pierre Dittgen committed
62 63 64 65 66 67 68 69 70 71 72

        # First schema keys but 'fields'
        d1 = dict([(k, v) for k, v in cls.schema_dict[schema_code]['schema'].items() if k != 'fields'])
        print('D1 =', d1)

        # All keys but schema* and custom_checks*
        d2 = {k: v for k, v in cls.schema_dict[schema_code].items()
              if not k.startswith('schema') and not k.startswith('custom_checks')}
        print('D2 =', d2)

        return {**d1, 'code': schema_code, **d2}
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87

    @classmethod
    def schema_info_list(cls):
        """ Computes and return schema info list """
        return [cls.schema_info(code) for code in sorted(cls.schema_dict.keys())]

    @classmethod
    def validate_url(cls, schema_code, url):
        """ TODO: validate URL against schema """
        print('Validate url [{}] against schema {}'.format(url, schema_code))

    @classmethod
    def validate_file(cls, schema_code, file):
        """ TODO: validate file agains schema """
        print('Validate file [{}] against schema {}'.format(file, schema_code))