view Generic/utils.py @ 26:16f91684686b default tip

Upgrade to python 3. Keep python 2/3 compatibility
author Miguel Ángel Bárcena Rodríguez <miguelangel@obraencurso.es>
date Tue, 18 Jun 2019 17:50:23 +0200
parents 65e7ae0d0e63
children
line wrap: on
line source

#!/usr/bin/python
# -*- coding: utf-8 -*-
## File utils.py
## This file is part of pyArq-Presupuestos.
##
## Copyright (C) 2010-2019 Miguel Ángel Bárcena Rodríguez
##                         <miguelangel@obraencurso.es>
##
## pyArq-Presupuestos is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## pyArq-Presupuestos is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Modules

# python 2/3 compatibility
from __future__ import absolute_import, division, print_function, unicode_literals
from builtins import str as text
from six import text_type
from io import IOBase

import re 
import imghdr
import os.path


# add svg to imghdr
def test_svg(h, f):
    """SVG """
    if  isinstance(f,IOBase):
        _pos = f.tell()
        f.seek(0)
        _h = f.read(32)
        f.seek(-32, 2)
        _l = f.read(32)
        f.seek(_pos)
    else:
        _h = h
        _l = h[-32:]
    if b"<?xml" in _h and b"</svg>" in  _l:
        return 'svg'
imghdr.tests.append(test_svg)

# add ico to imghdr
def test_ico(h, f):
    """ico image file"""
    if h[:4] == b"\x00\x00\x01\x00":
        return 'ico'
imghdr.tests.append(test_ico)

# add wmf to imghdr
def test_pdf(h, f):
    """pdf file"""
    if h[:4] == b"%PDF":
        return 'pdf'
imghdr.tests.append(test_pdf)

# add wmf to imghdr
def test_wmf(h, f):
    """wmf image library"""
    if h[:6] == b"\xd7\xcd\xc6\x9a\x00\x00":
        return 'wmf'
imghdr.tests.append(test_wmf)

# add dxf to imghdr
def test_dxf(h, f):
    """AutoCAD DXF: Drawing Interchange Format"""
    if  isinstance(f,IOBase):
        _pos = f.tell()
        f.seek(0)
        _h = f.read(128)
        f.seek(-32, 2)
        _l = f.read(32)
        f.seek(_pos)
    else:
        _h = h
        _l = h[-32:]
    _h = _h.replace(b"\r",b"")
    _l = _l.replace(b"\r",b"")
    if (b"  0\nSECTION\n  2\nHEADER\n" in _h or\
       b"  0\nSECTION\n  2\nCLASSES\n" in _h or\
       b"  0\nSECTION\n  2\nTABLES\n" in _h or\
       b"  0\nSECTION\n  2\nBLOCKS\n" in _h or\
       b"  0\nSECTION\n  2\nENTITIES\n" in _h or\
       b"  0\nSECTION\n  2\nOBJECTS\n" in _h or\
       b"  0\nSECTION\n  2\nTHUMBNAILIMAGE\n" in _h) and \
       _l[-19:] == b"  0\nENDSEC\n  0\nEOF\n":
        return 'dxf'
imghdr.tests.append(test_dxf)


def mapping(string, tuple_strings):
    """mapping(string, tuple)
    
    string: a message string
    tuple_strings: a truple with string items
    Return the string replacing the $[n] words whith its corresponding value
    from the tuple.
    It is used because the gettext module can not #-#supotr#-# strings as:
        "Invalid type (%s) in record: %s" %(type, record)
    """
    for _index in range(len(tuple_strings)):
        string = string.replace("$" + text(_index+1), tuple_strings[_index])
    return string

def eliminate_duplicates(list):
    """eliminate_duplicates(list)
    
    Return a copy of the list without duplicate values
    """
    _result = [ ]
    for item in list:
        if item not in _result:
            _result.append(item)
    return _result

def is_valid_code(code):
    """is_valid_code(code)
    
    code: a string code
    Funtion to test if a record code is valid
    A valid code must fulfill:
        - Be a not empty string
        - The valid characters are the defined in MSdos 6.0 including .$#%&_
            What it means? I am not sure, so I test if all the character 
            are in cp850
        - Cannot contain the following characters
            <~>   separator of records if FIEBDC-3
            <|>   separator of fields if FIEBDC-3
            <\>   separator of subfield in FIEBDC-3
            <\t>  tab -> control character
            < >   space -> control character
            <\n>  end of line -> control character
            <\r>  end of line -> control character
        - Cannot end with <#> or <##>, root and chapter code record
    It return a tuple (is_valid, code)
        is_valid (True/False)
            True: the code is valid
            False: the code is not valid
        code(False/code)
            False: the code is not valid and can not be corrected
            code: the code or the corrected code
    """
    _is_valid = True
    if not isinstance(code, text_type):
        _tuni = _("Not a text string, code: $1, type: $2")
        _uni = mapping(_tuni, (code ,text(type(code))))
        print(_uni)
        return False, False
    if code == "":
        return False, False
    try:
        #_unicode_code = unicode(code, "utf-8",'replace')
        _unicode_code = code
        _code_utf8 = _unicode_code.encode("utf-8",'replace')
        _code_cp850 = _unicode_code.encode("cp850",'replace')
        _unicode_code = text(_code_cp850, "cp850",'replace')

    except UnicodeError:
        _tuni = _("Unicode Error, code: $1")
        _uni = mapping(_tuni, (code, ))
        print (_uni)
        return False, False
    if code != _unicode_code:
        _tuni = _("Not in cp850, code: $1")
        _uni = mapping(_tuni, (code, ))
        print (_uni)
        _is_valid = False
        if _code_utf8 == "":
            return False, False
        code = _code_utf8
    _code2 = re.sub("[\t \n\r~|\\\]","",code)
    if _code2 != code:
        _tuni = _("Control characters in code: $1")
        _uni = mapping(_tuni, (code, ))
        print(_uni)
        if _code2 == "":
            return False, False
        _is_valid = False
        code = _code2
    if code[-1] == "#":
        _tuni = _("# in code: $1")
        _uni = mapping(_tuni, (code, ))
        print(_uni)
        _is_valid = False
        while code[-1] == "#":
            code = code[:-1]
    if code == "":
        _tuni = _("Empty code")
        print(_tuni)
        return False, False
    return _is_valid, code

def getFiletype(filename, h=None):
    """getFiletype(filename, h=None):
    
    filename: the filename to test
    h: raw string, if h is not None the filename is ignored and h is assumed
    to contain the byte stream to test
    
    valid types:
     "image", "wmf", "dxf", "pdf" , "video",
     "office-document", "office-presentation", "office-spreadsheet",
     "html", "rtf", "txt"
    """
    _ext = os.path.splitext(filename)[1][1:].lower()
    
    _video_types = ["avi", "mp4", "m4p", "m4v2", "m4v","amv", "mpg", "m2v",
                    "mp2", "mpe", "mpv", "mpeg", "ogg", "ogv", "webm", "mkv",
                    "ogm", "flv", "f4v", "f4p", "f4a", "f4b", "vob", "drc",
                    "mts", "m2ts", "mov", "qt", "wmv", "yuv", "rm", "rmvb",
                    "asf", "svi", "3gp", "3g2", "mxf", "roq", "nsv"]
    _document_types = ["doc", "docx", "odt"]
    _spreadsheet_types = ["xls", "xlsx", "ods"]
    _presentation_types = ["pps", "ppsx", "ppt", "pptx", "odp"]
    _html_types = ["html", "xhtml"]
    if _ext in _video_types:
        return "video"
    elif _ext in _document_types:
        return "office-document"
    elif _ext in _spreadsheet_types:
        return "office-spreadsheet"
    elif _ext in _presentation_types:
        return "office-presentation"
    elif _ext in _html_types:
        return "html"
    elif _ext == "rtf":
        return "rtf"
    elif _ext == "txt":
        return "txt"
    _type = imghdr.what(filename, h)
    _image_types = ["rgb", "gif", "pbm", "pgm", "ppm" ,"tiff", "tif", "rast",
                    "xbm", "jpeg", "jpg", "bmp", "png", "webp", "exr",
                    "ico", "svg"]
    if _type in _image_types and _ext in _image_types:
        return "image"
    elif _type == "wmf" and _ext == "wmf":
        return "wmf"
    elif _type == "dxf" and _ext == "dxf":
        return "dxf"

    elif _type == "pdf" and _ext == "pdf":
        return "pdf"
    return None