Mercurial > pyarq-presupuestos

diff Generic/fiebdc.py @ 17:a7b9f7e7dfa4
Improvements importing FIEBDC files
author: Miguel Ángel Bárcena Rodríguez <miguelangel@obraencurso.es>
date: Sat, 02 Nov 2013 19:26:09 +0100
parents: 0359329a1c26
children: 878159a13494
--- a/Generic/fiebdc.py	Fri Apr 19 00:04:20 2013 +0200
+++ b/Generic/fiebdc.py	Sat Nov 02 19:26:09 2013 +0100
@@ -3,7 +3,7 @@
 ## File fiebdc.py
 ## This file is part of pyArq-Presupuestos.
 ##
-## Copyright (C) 2010 Miguel Ángel Bárcena Rodríguez
+## Copyright (C) 2010-2013 Miguel Ángel Bárcena Rodríguez
 ##                         <miguelangel@obraencurso.es>
 ##
 ## pyArq-Presupuestos is free software: you can redistribute it and/or modify
@@ -26,7 +26,8 @@
 import re
 import calendar
 import os.path
-
+import unicodedata
+import hashlib
 # pyArq-Presupuestos modules
 import base
 from Generic import utils
@@ -97,24 +98,24 @@
         self.__generator = globalVars.version
         self.__character_set = "850"
         self.__pattern = {
-            "control_tilde" : re.compile("((\r\n)| |\t)+~"),
-            "control_vbar" : re.compile("((\r\n)| |\t)+\|"),
-            "control_backslash" : re.compile(r"((\r\n)| |\t)+\\"),
-            "valid_code" : re.compile("[^A-Za-z0-9ñÑ.$#%&_]"),
-            "special_char": re.compile("[#%&]"),
-            "no_float": re.compile("[^0-9.]"),
-            "formula" : re.compile(".*[^0123456789\.()\+\-\*/\^abcdp ].*"),
-            "comment": re.compile("#.*\r\n"),
-            "empty_line": re.compile(r"(\r\n) *\r\n"),
-            "space_before_backslash" : re.compile(r"( )+\\"),
-            "space_after_backslash" : re.compile(r"\\( )+"),
-            "start_noend_backslash" : re.compile("(\r\n\\\.*[^\\\])\r\n"),
-            "end_oper": re.compile("(\+|-|\*|/|/^|@|&|<|>|<=|>=|=|!) *\r\n"),
-            "matricial_var" : re.compile("(\r\n *[%|\$][A-ZÑ].*=.*,) *\r\n"),
-            "descomposition" : re.compile("^([^:]+):(.*)$"),
-            "var" : re.compile("^([$%][A-ZÑ][()0-9, ]*)=(.*)$"),
-            "after_first_tilde" : re.compile("^[^~]*~"),
-            "end_control" : re.compile("((\r\n)| |\t)+$"),
+            "control_tilde" : re.compile(u"((\r\n)| |\t)+~"),
+            "control_vbar" : re.compile(u"((\r\n)| |\t)+\|"),
+            "control_backslash" : re.compile(ur"((\r\n)| |\t)+\\"),
+            "valid_code" : re.compile(u"[^A-Za-z0-9ñÑ.$#%&_]"),
+            "special_char": re.compile(u"[#%&]"),
+            "no_float": re.compile(u"[^0-9.]"),
+            "formula" : re.compile(u".*[^0123456789\.()\+\-\*/\^abcdp ].*"),
+            "comment": re.compile(u"#.*\r\n"),
+            "empty_line": re.compile(ur"(\r\n) *\r\n"),
+            "space_before_backslash" : re.compile(ur"( )+\\"),
+            "space_after_backslash" : re.compile(ur"\\( )+"),
+            "start_noend_backslash" : re.compile(u"(\r\n\\\.*[^\\\])\r\n"),
+            "end_oper": re.compile(u"(\+|-|\*|/|/^|@|&|<|>|<=|>=|=|!) *\r\n"),
+            "matricial_var" : re.compile(u"(\r\n *[%|\$][A-ZÑ].*=.*,) *\r\n"),
+            "descomposition" : re.compile(u"^([^:]+):(.*)$"),
+            "var" : re.compile(u"^([$%][A-ZÑ][()0-9, ]*)=(.*)$"),
+            "after_first_tilde" : re.compile(u"^[^~]*~"),
+            "end_control" : re.compile(u"((\r\n)| |\t)+$"),
             }
 
     def cancel(self):
@@ -134,9 +135,9 @@
         record ~P
         """
         # "control_tilde" : "((\r\n)| |\t)+~"
-        string = self.__pattern["control_tilde"].sub("~",string)
+        string = self.__pattern["control_tilde"].sub(u"~",string)
         # "control_vbar" : "((\r\n)| |\t)+\|"
-        string = self.__pattern["control_vbar"].sub("|",string)
+        string = self.__pattern["control_vbar"].sub(u"|",string)
         # "control_backslash" : r"((\r\n)| |\t)+\\"
         #string = self.__pattern["control_backslash"].sub(r"\\",string)
         return string
@@ -147,31 +148,44 @@
         Test if the code have invalid characters and try to erase it,
         if it is posible return a valid code else return a empty string.
         """
-        if not isinstance(code, str):
-            print _("Invalid code, it must be a string")
-            return ""
-        # Valid chararcter: A-Z a-z 0-9 ñ Ñ . $ # % & _
+        if not isinstance(code, unicode):
+            print _("Invalid code, it must be a unicode string")
+            return u""
+        # Valid chararcter: A-Z a-z 0-9 ñ Ñ . $ # % & _ 
         # "valid_code" : "[^A-Za-z0-9ñÑ.$#%&_]"
-        _code = self.__pattern["valid_code"].sub("", code)
-        if _code != code:
-            print utils.mapping(_("The code '$1' have invalid characters."),
-                               (code,))
-            code = _code
+        _ucode = self.__pattern["valid_code"].sub(u"", code)
+        if _ucode != code:
+            try:
+                print utils.mapping(_("The code '$1' have invalid characters."),
+                               (code.encode("utf8"),))
+            except:
+                print utils.mapping(_("The code '$1' have invalid characters and can not be encoded in utf8."), (code,))
+            
+            if len(_ucode) == 0:
+                _normalize_code = ''.join((c for c in unicodedata.normalize('NFD', _ucode) if unicodedata.category(c) != 'Mn'))
+                # from http://www.leccionespracticas.com/uncategorized/eliminar-tildes-con-python-solucionado/
+                _ucode = self.__pattern["valid_code"].sub(u"", _normalize_code)
+                if len(_ucode) == 0:
+                    _hash_code = hashlib.sha256()
+                    _hash_code.update(code.encode('utf-8'))
+                    _hexdigest_code = _hash_code.hexdigest()
+                    _ucode = self.__pattern["valid_code"].sub(u"", _hexdigest_code)
+            code = _ucode
         # the lasts characters can not be <#> or <##>
         # <##> -> root record in FIEFDC-3
         # <#> -> chapter record in FIEFDC-3
         if len(code) > 0:
-            while code[-1] == "#":
+            while code[-1] == u"#":
                 code = code[:-1]
             if len(code) > 20:
                 code = code[:20]
             # only one charecter # % or &
-            if sum([code.count(c) for c in '#%&']) > 1:
+            if sum([code.count(c) for c in u'#%&']) > 1:
                 print utils.mapping(_("The code '$1' contains special "\
-                                      "characters repeated."),(code,))
-                _i = min([code.find(c) for c in '#%&'])
+                                      "characters repeated."),(code.encode("utf8"),))
+                _i = min([code.find(c) for c in u'#%&'])
                 code = code[:_i+1] + \
-                        self.__pattern["special_char"].sub("", code[_i+1:])
+                        self.__pattern["special_char"].sub(u"", code[_i+1:])
         return code
 
     def parseDate(self, date):
@@ -187,11 +201,11 @@
         or None if the date format is invalid
         """
         # All characters must be numbers, len <= 8 and not empty string
-        if not date.isdigit() or len(date) > 8 or date == "":
+        if not date.isdigit() or len(date) > 8 or date == u"":
             return None
         else:
             if len(date)%2 == 1: # uneven len: add a leading 0
-                date = "0" + date
+                date = u"0" + date
             if len(date) == 8:
                 _d = int(date[:2])
                 _m = int(date[2:4])
@@ -307,66 +321,66 @@
         # TODO:  ~P. Registro tipo Descripción Paramétrica.
         # TODO:  ~O. Registro tipo Relación Comercial.
         # TODO: test records
-        _field_list = record.split("|")
+        _field_list = record.split(u"|")
         self._record_number = self._record_number +1
         _budget = self.__budget
-        if _field_list[0] == "V":
+        if _field_list[0] == u"V":
             self._record_V_number += 1
             self._parseV(_field_list)
-        elif _field_list[0] == "C":
+        elif _field_list[0] == u"C":
             self._record_C_number += 1
             self._parseC(_field_list)
-        elif _field_list[0] == "D":
+        elif _field_list[0] == u"D":
             self._record_D_number += 1
             self._parseDY(_field_list)
-        elif _field_list[0] == "Y":
+        elif _field_list[0] == u"Y":
             self._record_Y_number += 1
             self._parseDY(_field_list)
-        elif _field_list[0] == "M":
+        elif _field_list[0] == u"M":
             self._record_M_number += 1
             self._parseMN(_field_list)
-        elif _field_list[0] == "N":
+        elif _field_list[0] == u"N":
             self._record_N_number += 1
             self._parseMN(_field_list)
-        elif _field_list[0] == "T":
+        elif _field_list[0] == u"T":
             self._record_T_number += 1
             self._parseT(_field_list)
-        elif _field_list[0] == "K":
+        elif _field_list[0] == u"K":
             self._record_K_number += 1
             self._parseK(_field_list)
-        elif _field_list[0] == "W":
+        elif _field_list[0] == u"W":
             self._record_W_number += 1
             self._parseW(_field_list)
-        elif _field_list[0] == "L":
+        elif _field_list[0] == u"L":
             self._record_L_number += 1
             self._parseL(_field_list)
-        elif _field_list[0] == "Q":
+        elif _field_list[0] == u"Q":
             self._record_Q_number += 1
             self._parseQ(_field_list)
-        elif _field_list[0] == "J":
+        elif _field_list[0] == u"J":
             self._record_J_number += 1
             self._parseJ(_field_list)
-        elif _field_list[0] == "G":
+        elif _field_list[0] == u"G":
             self._record_G_number += 1
             self._parseG(_field_list)
-        elif _field_list[0] == "E":
+        elif _field_list[0] == u"E":
             self._record_E_number += 1
             self._parseE(_field_list)
         elif _field_list[0] == "O":
             self._record_O_number += 1
-        elif _field_list[0] == "P":
+        elif _field_list[0] == u"P":
             self._record_P_number += 1
             self._parseP(_field_list)
-        elif _field_list[0] == "X":
+        elif _field_list[0] == u"X":
             self._record_X_number += 1
             self._parseX(_field_list)
-        elif _field_list[0] == "B":
+        elif _field_list[0] == u"B":
             self._record_B_number += 1
             self._parseB(_field_list)
-        elif _field_list[0] == "F":
+        elif _field_list[0] == u"F":
             self._record_F_number += 1
             self._parseF(_field_list)
-        elif _field_list[0] == "A":
+        elif _field_list[0] == u"A":
             self._record_A_number += 1
             self._parseA(_field_list)
         else:
@@ -401,7 +415,7 @@
         # If there are no sufficient fields, the fields are added
         # with empty value:""
         else:
-            field_list = field_list + [""]*(10-len(field_list))
+            field_list = field_list + [u""]*(10-len(field_list))
         # control character are erased: end of line, tab, space
         # only leading and trailing whitespace in owner, generator, comment
         # _____Fields_____
@@ -414,42 +428,45 @@
         _header_title = field_list[4].strip()
         _header_title = self.delete_control(_header_title)
         _character_set = self.delete_control_space(field_list[5])
-        _comment = field_list[6].strip("\t \n\r")
+        _comment = field_list[6].strip(u"\t \n\r")
         _data_type = self.delete_control_space(field_list[7])
         _number_certificate = self.delete_control_space(field_list[8])
         __date_certificate = self.delete_control_space(field_list[9])
         # _____Owner_____
         self.__budget.setOwner(_owner)
         # _____Version-Date_____
-        _version_date = _version_date.split("\\")
+        _version_date = _version_date.split(u"\\")
         _file_format = _version_date[0]
         if _file_format in self.__format_list:
             self.__file_format = _file_format
-            print _("FIEBDC format: %s" % _file_format)
+            print utils.mapping(_("FIEBDC format: $1"),(_file_format,))
+
         if len(_version_date) > 1:
             _date = _version_date[1]
-            if _date != "":
+            if _date != u"":
                 _parsed_date = self.parseDate(_date)
                 if _parsed_date is not  None:
                     self.__budget.setDate(_parsed_date)
         # _____Generator_____
         # ignored field
-        print _("FIEBDC file generated by %s" % _generator)
+        print utils.mapping(_("FIEBDC file generated by $1"),(_generator,))
         # _____Header_Title_____
-        _header_title = _header_title.split("\\")
+        _header_title = _header_title.split(u"\\")
         _header_title = [_title.strip() for _title in _header_title]
         _header = _header_title.pop(0)
+        _header = [_item.encode("utf8") for _item in _header]
         _title = [ ]
         for _title_index in _header_title:
-            if _title_index != "":
+            if _title_index != u"":
                 _title.append(_title_index)
-        if _header != "":
-            self.__budget.setTitleList([ _header, _title ])
+        _title = [_item.encode("utf8") for _item in _title]
+        if _header != u"":
+            self.__budget.setTitleList([ _header, _title])
         # _____Characters_set_____
         # field parsed in readFile method
         # _____Comment_____
-        if _comment != "":
-            self.__budget.setComment(_comment)
+        if _comment != u"":
+            self.__budget.setComment(_comment.encode("utf8"))
         # _____Data type_____
         # 1 -> Base data.
         # 2 -> Budget.
@@ -508,16 +525,16 @@
         _field1 = self.delete_control_space(field_list[1])
         _field2 = self.delete_control_space(field_list[2])
         # _____Field 1_____
-        if len(_field1) > 0 and _field1[-1] == "\\":
+        if len(_field1) > 0 and _field1[-1] == u"\\":
             _field1 = _field1[:-1]
             # if there are a \ character at the end it must be erased
-        _percentages = _field1.split("\\")
+        _percentages = _field1.split(u"\\")
         if len(_percentages) > 5:
             _percentages = _percentages[:5]
         # If there are no sufficient subfields, the subfields are added
         # with empty value:""
         else:
-            _percentages = _percentages + [""]*(5-len(_percentages))
+            _percentages = _percentages + [u""]*(5-len(_percentages))
         _percentage_titles = [ "CI", "GG", "BI", "BAJA", "IVA" ]
         _percentage_dict = {}
         for _percentage_index in range(len(_percentages)):
@@ -534,12 +551,12 @@
         _title_num = len(self.__budget.getTitleList()[1])
         if _title_num == 0: _title_num = 1
         # If the field 2 is empty, the field 0 is readed
-        if _field2 == "":
+        if _field2 == u"":
             # _____Field 0_____
-            if _field0[-1] == "\\":
+            if _field0[-1] == u"\\":
                 _field0 = _field0[:-1]
                 # if there are a \ character at the end it must be erased
-            _decimal_list = _field0.split("\\")
+            _decimal_list = _field0.split(u"\\")
             _decimal_index = 0
             if len(_decimal_list)%9 != 0:
                 # if it is not multiple of 9, empty subfield are added
@@ -740,7 +757,7 @@
         # If there are no sufficient fields, the fields are added
         # with empty value:""
         else:
-            field_list = field_list + [""]*(7-len(field_list))
+            field_list = field_list + [u""]*(7-len(field_list))
         # control character are erased: en of line, tab, space 
         # _____Fields_____
         _record_type = field_list[0]
@@ -751,13 +768,13 @@
         _dates = self.delete_control_space(field_list[5])
         _type = self.delete_control_space(field_list[6])
         # _____Code_____
-        _codes = _codes.split("\\")
+        _codes = _codes.split(u"\\")
         if len(_codes) > 0:
             # parse the hierarchy of the first code
             # hierarchy: 0->root, 1->Chapter/subchapter, 2->other
-            if len(_codes[0]) > 2 and _codes[0][-2:] == "##":
+            if len(_codes[0]) > 2 and _codes[0][-2:] == u"##":
                 _hierarchy = 0
-            elif len(_codes[0]) > 1 and _codes[0][-1:] == "#":
+            elif len(_codes[0]) > 1 and _codes[0][-1:] == u"#":
                 _hierarchy = 1
             else:
                 _hierarchy = 2
@@ -766,12 +783,12 @@
             # maximun len 20 characters
             _codes = [self.validateCode(_code) for _code in _codes]
         # empty codes are ignored
-        while "" in _codes:
-            _codes.remove("")
+        while u"" in _codes:
+            _codes.remove(u"")
         if len(_codes) > 0:
             #TODO: test this
             _code = _codes[0]
-            _synonyms = _codes
+            _synonyms = [synonym.encode("utf8") for synonym in _codes]
         else:
             print _("Record C without a valid code")
             return
@@ -781,12 +798,12 @@
         # nothing to do
         # _____Price_____ and _____Dates_____
         # last \ is erased
-        if len(_dates) > 0 and _dates[-1] == "\\":
+        if len(_dates) > 0 and _dates[-1] == u"\\":
             _dates = _dates[:-1]
-        if len(_prices) > 0 and _prices[-1] == "\\":
+        if len(_prices) > 0 and _prices[-1] == u"\\":
             _prices = _prices[:-1]
-        _dates = _dates.split("\\")
-        _prices = _prices.split("\\")
+        _dates = _dates.split(u"\\")
+        _prices = _prices.split(u"\\")
         # number of prices = number of titles in "V" line
         # if there are no sufficient prices it takes the last price defined
         _title_num = len(self.__budget.getTitleList()[1])
@@ -842,59 +859,60 @@
         #                    2 -> None,Q,%
         #                    3 -> None,MC,MCr,MM,MS,ME,MCu,Mal,ML,M
         if _hierarchy == 0:
-            if _type == "OB":
+            if _type == u"OB":
                 _subtype = _type
                 _type = 0
-            elif _type == "0" or _type == "":
-                _subtype = ""
+            elif _type == u"0" or _type == u"":
+                _subtype = u""
                 _type = 0
             else:
                 print utils.mapping(_("Incorrect type ($1) in the code $2"),
-                      (str(_type), _code))
+                      (_type.encode("utf8"), _code.encode("utf8")))
                 _type = 0
-                _subtype = ""
+                _subtype = u""
         elif _hierarchy == 1:
-            if _type == "PU":
+            if _type == u"PU":
                 _subtype = _type
                 _type = 0
-            elif _type == "0" or _type == "":
-                _subtype = ""
+            elif _type == u"0" or _type == u"":
+                _subtype = u""
                 _type = 0
             else:
                 print utils.mapping(_("Incorrect type ($1) in the code $2"),
-                      (str(_type), _code))
+                      (_type.encode("utf8"), _code.encode("utf8")))
                 _type = 0
-                _subtype = ""
+                _subtype = u""
         else:
-            if _type == "EA" or _type == "EU" or _type == "EC" or \
-               _type == "EF" or _type == "PA":
+            if _type == u"EA" or _type == u"EU" or _type == u"EC" or \
+               _type == u"EF" or _type == u"PA":
                 _subtype = _type
                 _type = 0
-            elif _type == "H":
+            elif _type == u"H":
                 _subtype = _type
                 _type = 1
-            elif _type == "Q" or _type == "%":
+            elif _type == u"Q" or _type == u"%":
                 _subtype = _type
                 _type = 2
-            elif _type == "MC" or _type == "MCr" or _type == "MM" or \
-                 _type == "MS" or _type == "ME" or _type == "MCu" or \
-                 _type == "Mal" or _type == "ML" or _type == "M":
+            elif _type == u"MC" or _type == u"MCr" or _type == u"MM" or \
+                 _type == u"MS" or _type == u"ME" or _type == u"MCu" or \
+                 _type == u"Mal" or _type == u"ML" or _type == u"M":
                 _subtype = _type
                 _type = 3
-            elif _type == "0" or _type == "1" or _type == "2" or \
-                 _type == "3":
-                _subtype = ""
+            elif _type == u"0" or _type == u"1" or _type == u"2" or \
+                 _type == u"3":
+                _subtype = u""
                 _type = int(_type)
-            elif _type == "":
-                _subtype = ""
+            elif _type == u"":
+                _subtype = u""
                 _type = 0
             else:
                 print utils.mapping(_("Incorrect type ($1) in the code $2"),
-                      (str(_type), _code))
+                      (_type.encode("utf8"), _code.encode("utf8")))
                 _type = 0
-                _subtype = ""
-        self.__budget.setRecord(_code, _synonyms, _hierarchy,
-            _unit, _summary, _prices, _dates, _type, _subtype)
+                _subtype = u""
+        self.__budget.setRecord(_code.encode("utf8"), _synonyms, _hierarchy,
+            _unit.encode("utf8"), _summary.encode("utf8"),
+            _prices, _dates, _type, _subtype.encode("utf8"))
         self.num_valid_record = self.num_valid_record + 1
     
     def _parseDY(self, field_list):
@@ -913,7 +931,7 @@
         # If there are no sufficient fields, the fields are added
         # with empty value:""
         else:
-            field_list = field_list + [""]*(3-len(field_list))
+            field_list = field_list + [u""]*(3-len(field_list))
         # control character are erased: end of line, tab, space 
         # _____Fields_____
         _record_type = field_list[0]
@@ -922,10 +940,10 @@
         # _____Code_____
         # "#" and "##" characters at the end of the code are erased
         # invalid characters are also erased
-        _code = self.validateCode(_code) 
+        _code = self.validateCode(_code)
         # _____children_____
         # TODO: test the number of decimals in factor an yield values
-        _children = _children.split( "\\" )
+        _children = _children.split(u"\\")
         _children_list = [ ]
         _child_index = 0
         while _child_index < len(_children)-3:
@@ -936,7 +954,7 @@
             # _____child_code_____
             _child_code = self.validateCode(_child_code)
             # _____factor_____
-            if _factor != "":
+            if _factor != u"":
                 try:
                     _factor = float(_factor)
                 except ValueError:
@@ -944,10 +962,10 @@
                           "descomposition of the record $1, the factor "\
                           "of the child $2 must be a float number and "\
                           "can not be $3, seted default value 1.0"),
-                          (_code, _child_code, _factor))
+                          (_code.encode("utf8"), _child_code.encode("utf8"), _factor.encode("utf8")))
                     _factor = 1.0
             #____yield___
-            if _yield != "":
+            if _yield != u"":
                 try:
                     _yield = float(_yield)
                 except ValueError:
@@ -955,15 +973,15 @@
                           "descomposition of the record $1, the yield of "\
                           "the child $2, must be a float number and can"\
                           "not be $3,  seted default value 1.0"),
-                           (_code, _child_code, _factor))
+                           (_code.encode("utf8"), _child_code.encode("utf8"), _factor.encode("utf8")))
                     _yield = 1.0
-            if _child_code != "" and _code != "":
+            if _child_code != u"" and _code != u"":
                 _children_list.append([_child_code, _factor, _yield ])
-            if _record_type == "D":
+            if _record_type == u"D":
                 _position = _child_index / 3
             else: #_record_type == "Y"
                 _position = -1
-            self.__budget.setTree(_code, _child_code, _position, _factor, 
+            self.__budget.setTree(_code.encode("utf8"), _child_code.encode("utf8"), _position, _factor, 
                 _yield, "", "", "", "")
             _child_index = _child_index + 3
         self.num_valid_record = self.num_valid_record +1
@@ -993,7 +1011,7 @@
         # invalid characters are also erased
         _code = self.validateCode(_code) 
         # _____Text_____
-        self.__budget.setText(_code, _text)
+        self.__budget.setText(_code.encode("utf8"), _text.encode("utf8"))
         self.num_valid_record = self.num_valid_record + 1
 
     def _parseMN(self, field_list):
@@ -1007,7 +1025,6 @@
             4- {Type\Comment\Unit\Length\Width\Height\}
             5- [Label]
         """
-
         # _____Number of fields_____
         # Any INFORMATION after last field separator is ignored
         # The record must have 6 fields
@@ -1016,7 +1033,7 @@
         # If there are no sufficient fields, the fields are added
         # with empty value:""
         else:
-            field_list = field_list + [""]*(6-len(field_list))
+            field_list = field_list + [u""]*(6-len(field_list))
         # control character are erased: end of line, tab, space
         # _____Fields_____
         _record_type = field_list[0]
@@ -1026,59 +1043,69 @@
         _lines = self.delete_control(field_list[4])
         _label = self.delete_control_space(field_list[5])
         # _____Codes_____
-        _code_list = _codes.split( "\\" )
+        _code_list = _codes.split(u"\\")
         # "#" and "##" characters at the end of the code are erased
         # invalid characters are also erased
         if len(_code_list) == 2:
             _parent_code = self.validateCode(_code_list[0]) 
-            if _parent_code == "":
+            if _parent_code == u"":
                 _parent_code = None
+            else:
+                _parent_code = _parent_code.encode("utf8")
             _child_code =  self.validateCode(_code_list[1])
         elif len(_code_list) == 1:
             _child_code =  self.validateCode(_code_list[0])
             _parent_code = None
         else:
             print utils.mapping(_("Invalid codes in $1 record, codes $2"),
-                  (_record_type, _codes))
+                  (_record_type.encode("utf8"), _codes.encode("utf8")))
             return
-        if _child_code == "":
+        if _child_code == u"":
             print utils.mapping(_("Empty child code in $1 record, codes: "\
-                  "$2"), (_record_type, _codes))
+                  "$2"), (_record_type.encode("utf8"), _codes.encode("utf8")))
             return
+        if _parent_code == None:
+            # Empty parent code. No-estructured measures.
+            pass
+
         # _____Path_____
-        # TODO: path=0, no-estructured measures
-        _path_list = _path.split( "\\" )
+        _path_list = _path.split( u"\\" )
         if len(_path_list) > 0:
-            while _path_list[-1] == "":
+            while len(_path_list) > 0 and _path_list[-1] == u"":
                 _path_list = _path_list[:-1]
-            _path = _path_list[-1]
+            if len(_path_list) == 0:
+                # Empty path. No-estructured measures. Path fixed to -2
+                _path = -2
+            else:
+                _path = _path_list[-1]
             try:
                 _path = int(_path)
             except ValueError:
                 print utils.mapping(_("Invalid path in $1 record, "\
-                      "codes $2"), (_record_type, _codes))
+                      "codes $2"), (_record_type.encode("utf8"), _codes.encode("utf8")))
                 return
             if _path > 0:
                 _path -= 1
         else:
-            _path = 0
+            _path = -2
         # _____Total_____
         try:
             _total = float(_total)
         except ValueError:
             print utils.mapping(_("Invalid Total Measure value in $1 "\
-                  "record, codes $2"), (_record_type, _codes))
-            return
+                  "record, codes $2. Total fixed to 0."),
+                  (_record_type.encode("utf8"), _codes.encode("utf8")))
+            _total = 0
         # _____Measure lines_____
-        _lines = _lines.split( "\\" )
+        _lines = _lines.split(u"\\")
         _line_index = 0
         _line_list = [ ]
         while _line_index < len(_lines)-6:
             _linetype = _lines[_line_index]
-            if _linetype == "":
+            if _linetype == u"":
                 _linetype = 0
-            elif _linetype == "1" or _linetype == "2" or \
-                   _linetype == "3":
+            elif _linetype == u"1" or _linetype == u"2" or \
+                   _linetype == u"3":
                     _linetype = int(_linetype)
             else:
                 _linetype = 0
@@ -1088,31 +1115,38 @@
                 if self.__pattern["formula"].match(_comment):
                     print utils.mapping(_("The comment is not a formula or "\
                           "its have invalid characters, in the $1 record, "\
-                          "codes $2"), (_record_type, _codes))
+                          "codes $2"), (_record_type.encode("utf8"), _codes.encode("utf8")))
                     return
                 else:
-                    _formula = _comment
+                    _formula = _comment.encode("utf8")
                     _comment = ""
             else:
                 _formula = ""
+                _comment = _comment.encode("utf8")
             _units = _lines[_line_index + 2]
+            _units = self.__pattern["no_float"].sub(u"", _units)
             _length = _lines[_line_index + 3]
+            _length = self.__pattern["no_float"].sub(u"", _length)
             _width = _lines[_line_index + 4]
+            _width  = self.__pattern["no_float"].sub(u"", _width)
             _height = _lines[_line_index + 5]
+            _height  = self.__pattern["no_float"].sub(u"", _height)
+
             try:
-                if _units != "": _units = float(_units)
-                if _length != "": _length = float(_length)
-                if _width != "": _width = float(_width)
-                if _height != "": _height = float(_height)
+                if _units != u"":
+                    _units = float(_units)
+                if _length != u"": _length = float(_length)
+                if _width != u"": _width = float(_width)
+                if _height != u"": _height = float(_height)
             except ValueError:
-                print utils.mapping("The measure values are not float "\
-                      "numbers, code $1", (_codes,))
+                print utils.mapping(_("The measure values are not float "\
+                      "numbers, code $1"), (_codes.encode("utf8"),))
                 return
             _line_list.append([_linetype, _comment, _units,
                                _length, _width, _height, _formula])
             _line_index = _line_index + 6
-        self.__budget.setTree(_parent_code, _child_code, _path, "", "",
-                           _total, _line_list, _label, _record_type)
+        self.__budget.setTree(_parent_code, _child_code.encode("utf8"), _path, "", "",
+                           _total, _line_list, _label.encode("utf8"), _record_type.encode("utf8"))
         self.num_valid_record = self.num_valid_record + 1
 
     def _parseW(self, field_list):
@@ -1134,9 +1168,9 @@
         # _____Fields_____
         _code_fields = field_list[0]
         # last \ is erased
-        if len(_code_fields) and _code_fields[-1] == "\\":
+        if len(_code_fields) and _code_fields[-1] == u"\\":
             _code_fields = _code_fields[:-1]
-        _code_fields = _code_fields.split("\\")
+        _code_fields = _code_fields.split(u"\\")
         _field_dict = {}
         _field_index = 0
         while _field_index < len(_code_fields)-1:
@@ -1148,8 +1182,8 @@
             #"control": "[\t \n\r]"
             _field_code = self.delete_control_space(_field_code)
             # _____section_title_____
-            if _field_code != "":
-                _field_dict[_field_code] = _field_title
+            if _field_code != u"":
+                _field_dict[_field_code.encode("utf8")] = _field_title.encode("utf8")
             _field_index = _field_index + 2
         self.__budget.setSheetFields(_field_dict)
         self.num_valid_record = self.num_valid_record +1
@@ -1173,7 +1207,7 @@
         if len(field_list) < 3:
             return
         _code = field_list[1]
-        if _code == "":
+        if _code == u"":
             # A: Section Titles
             # Any INFORMATION after last field separator is ignored
             # The record must have 3 fields
@@ -1183,9 +1217,9 @@
             # _____Fields_____
             _section_codes = field_list[1]
             # last \ is erased
-            if len(_section_codes) and _section_codes[-1] == "\\":
+            if len(_section_codes) and _section_codes[-1] == u"\\":
                 _section_codes = _section_codes[:-1]
-            _section_codes = _section_codes.split("\\")
+            _section_codes = _section_codes.split(u"\\")
             _section_dict = {}
             _section_index = 0
             while _section_index < len(_section_codes)-1:
@@ -1198,8 +1232,8 @@
                 _section_code = self.delete_control_space(_section_code)
                 # _____section_title_____
                 _section_title = self.delete_control_space(_section_title)
-                if _section_code != "":
-                    _section_dict[_section_code] = _section_title
+                if _section_code != u"":
+                    _section_dict[_section_code.encode("utf8")] = _section_title.encode("utf8")
                 _section_index = _section_index + 2
             self.__budget.setSheetSections(_section_dict)
             self.num_valid_record = self.num_valid_record +1
@@ -1217,15 +1251,15 @@
             # invalid characters are also erased
             _record_code = self.validateCode(_record_code)
             _scodes_text = field_list[1]
-            if _scodes_text == "":
+            if _scodes_text == u"":
                 # TODO: rtf and html files
-                print "Html and rtf files not implemented in ~L record"
+                print "Html and rtf files not yet implemented in ~L record"
             else:
                 # _____Section-code_Section-text_____
                 # last \ is erased
-                if len(_scodes_text) and _scodes_text[-1] == "\\":
+                if len(_scodes_text) and _scodes_text[-1] == u"\\":
                     _scodes_text = _scodes_text[:-1]
-                _scodes_text = _scodes_text.split("\\")
+                _scodes_text = _scodes_text.split(u"\\")
                 _paragraph_dict = {}
                 _section_dict = {}
                 _section_index = 0
@@ -1237,14 +1271,14 @@
                     # _____section_code_____
                     _section_code = self.delete_control_space(_section_code)
                     # _____section_text_____
-                    if _section_code != "" and _section_text != "":
+                    if _section_code != u"" and _section_text != u"":
                         #-# paragraph #-#
-                        _paragraph_code = _record_code + _section_code + "*"
-                        _paragraph_dict[ _paragraph_code ] = _section_text
-                        _section_dict[_section_code] = _paragraph_code
+                        _paragraph_code = _record_code + _section_code + u"*"
+                        _paragraph_dict[ _paragraph_code.encode("utf8") ] = _section_text.encode("utf8")
+                        _section_dict[_section_code.encode("utf8")] = _paragraph_code.encode("utf8")
                     _section_index = _section_index + 2
                 self.__budget.setSheetParagraphs(_paragraph_dict)
-                self.__budget.setSheetRecord(_record_code, "*", _section_dict)
+                self.__budget.setSheetRecord(_record_code.encode("utf8"), "*", _section_dict)
                 self.num_valid_record = self.num_valid_record +1
     
     def _parseQ(self, field_list):
@@ -1273,9 +1307,9 @@
         _record_code = self.validateCode(_record_code)
         _scodes_pkey = field_list[1]
         # last \ is erased
-        if len(_scodes_pkey) and _scodes_pkey[-1] == "\\":
+        if len(_scodes_pkey) and _scodes_pkey[-1] == u"\\":
             _scodes_pkey = _scodes_pkey[:-1]
-        _scodes_pkey = _scodes_pkey.split("\\")
+        _scodes_pkey = _scodes_pkey.split(u"\\")
         _field_dict = {}
         _section_index = 0
         while _section_index < len(_scodes_pkey) -1:
@@ -1291,21 +1325,21 @@
             # _____Fields keys_____
             _field_keys = self.delete_control_space(_field_keys)
             # last ; is erased
-            if len(_field_keys) and _field_keys[-1] == ";":
+            if len(_field_keys) and _field_keys[-1] == u";":
                 _field_keys = _field_keys[:-1]
-            _field_keys_list = _scodes_pkey.split(";")
+            _field_keys_list = _scodes_pkey.split(u";")
             for _field_key in _field_keys_list:
-                if _field_key != "" and _section_code != "" and \
-                   _paragraph_key != "":
+                if _field_key != u"" and _section_code != u"" and \
+                   _paragraph_key != u"":
                     if _field_key in _field_dict:
                         _section_dict = _field_dict[_field_key]
                     else:
                         _section_dict = {}
                         _field_dict[_field_key] = _section_dict
-                    _section_dict[_section_code] = _paragraph_code
+                    _section_dict[_section_code.encode("utf8")] = _paragraph_code.encode("utf8")
             _section_index = _section_index + 3
         for _field, _section_dict in _field_dict.iteritems():
-            self.__budget.setSheetRecord(_record_code, _field, _section_dict)
+            self.__budget.setSheetRecord(_record_code.encode("utf8"), _field.encode("utf8"), _section_dict)
         self.num_valid_record = self.num_valid_record +1
     
     def _parseJ(self, field_list):
@@ -1332,11 +1366,11 @@
         _paragraph_code = self.delete_control_space(field_list[0])
         # _____Paragraph text_____
         _paragraph_text = field_list[1]
-        if _paragraph_text == "":
+        if _paragraph_text == u"":
             # TODO: rtf and html files
-            print "Html and rtf files not implemented in ~J record"
+            print "Html and rtf files not yet implemented in ~J record"
         else:
-            self.__budget.setSheetParagraph(paragraph_code, paragraph_text)
+            self.__budget.setSheetParagraph(paragraph_code.encode("utf8"), paragraph_text.encode("utf8"))
             self.num_valid_record = self.num_valid_record +1
     
     def _parseG(self, field_list):
@@ -1366,17 +1400,18 @@
         _grafic_files = self.delete_control(field_list[1])
         # _____subfields_____
         # last \ is erased
-        if len(_grafic_files) and _grafic_files[-1] == "\\":
+        if len(_grafic_files) and _grafic_files[-1] == u"\\":
             _grafic_files = _grafic_files[:-1]
-        _grafic_file_list = _grafic_files.split("\\")
+        _grafic_file_list = _grafic_files.split(u"\\")
         _tested_grafic_file_list = []
         for _grafic_file in _grafic_file_list:
+            _str_grafic_file = _grafic_file.encode("utf8")
             _path = os.path.dirname(self.__filename)
-            _grafic_file_path = os.path.join(_path, _grafic_file)
+            _grafic_file_path = os.path.join(_path, _str_grafic_file)
             if os.path.exists(_grafic_file_path):
                 _tested_grafic_file_list.append(_grafic_file_path)
             else:
-                _name_ext = os.path.splitext(_grafic_file)
+                _name_ext = os.path.splitext(_str_grafic_file)
                 _grafic_file_name = _name_ext[0]
                 _grafic_file_ext = _name_ext[1]
                 _grafic_file_name_u = _grafic_file_name.upper()
@@ -1404,7 +1439,7 @@
                         (_grafic_file_path,))
         if len(_grafic_file_list) > 0:
             for _grafic_file in _tested_grafic_file_list:
-                self.__budget.addFile(_record_code, _grafic_file, "img", "")
+                self.__budget.addFile(_record_code.encode("utf8"), _grafic_file, "img", "")
             self.num_valid_record = self.num_valid_record +1
     
     def _parseE(self, field_list):
@@ -1429,11 +1464,11 @@
         # If there are no sufficient fields, the fields are added
         # with empty value:""
         else:
-            field_list = field_list[1:] + [""]*(6-len(field_list))
+            field_list = field_list[1:] + [u""]*(6-len(field_list))
         # _____Fields_____
         # _____company Code_____
         _company_code = self.delete_control_space(field_list[0])
-        if _company_code == "":
+        if _company_code == u"":
             return
         # _____Summary_____
 
@@ -1444,15 +1479,15 @@
         _local_offices = self.delete_control(field_list[3])
         # _____subfields of local_offices_____
         # last \ is erased
-        if len(_local_offices) and _local_offices[-1] == "\\":
+        if len(_local_offices) and _local_offices[-1] == u"\\":
             _local_offices = _local_offices[:-1]
-        _local_offices_list = _local_offices.split("\\")
+        _local_offices_list = _local_offices.split(u"\\")
         # If there are no sufficent subfields, the subfields are added 
         # whith empty value
         _nsub = len(_local_offices_list) % 10
         if _nsub != 0:
             _local_offices_list = _local_offices_list + \
-                                   [""]*(10-len(field_list))
+                                   [u""]*(10-len(field_list))
         _local_offices = []
         _local_offices_index = 0
         while _local_offices_index < len(_local_offices_list)-9:
@@ -1466,39 +1501,44 @@
             _country = _local_offices_list[_local_offices_index+6]
             _phone = _local_offices_list[_local_offices_index+7]
             # last ; is erased
-            if len(_phone) and _phone[-1] == ";":
+            if len(_phone) and _phone[-1] == u";":
                 _phone = _phone[:-1]
-            _phone_list = _phone.split(";")
+            _phone_list = _phone.split(u";")
+            _phone_list = [_phone.encode("utf8") for _phone in _phone_list]
             _fax = _local_offices_list[_local_offices_index+8]
             # last ; is erased
-            if len(_fax) and _fax[-1] == ";":
+            if len(_fax) and _fax[-1] == u";":
                 _fax = _fax[:-1]
-            _fax_list = _fax.split(";")
+            _fax_list = _fax.split(u";")
+            _fax_list = [_fax.encode("utf8") for _fax in _fax_list]
             _contact_person = _local_offices_list[_local_offices_index+9]
-            if _type != "" or _subname != "" or _address != "" or \
-               _postal_code != "" or _town != "" or _province != "" or \
-               _country != "" or _phone != "" or _fax != "" or \
-               _contact_person != "":
-                _local_offices.append([_type, _subname, _address,
-                                       _postal_code, _town, _province,
-                                       _country, _phone_list, _fax_list,
-                                       _contact_person])
+            if _type != u"" or _subname != u"" or _address != u"" or \
+               _postal_code != u"" or _town != u"" or _province != u"" or \
+               _country != u"" or _phone != u"" or _fax != u"" or \
+               _contact_person != u"":
+                _local_offices.append([_type.encode("utf8"), _subname.encode("utf8"),
+                                       _address.encode("utf8"), _postal_code.encode("utf8"),
+                                       _town.encode("utf8"), _province.encode("utf8"),
+                                       _country.encode("utf8"), _phone_list,
+                                       _fax_list, _contact_person.encode("utf8")])
             _local_offices_index = _local_offices_index + 10
         # _____cif web email_____
         _c_w_e = self.delete_control_space(field_list[4])
         # last \ is erased
-        if len(_c_w_e) and _c_w_e[-1] == "\\":
+        if len(_c_w_e) and _c_w_e[-1] == u"\\":
             _c_w_e = _c_w_e[:-1]
-        _c_w_e_list = _c_w_e.split("\\")
+        _c_w_e_list = _c_w_e.split(u"\\")
         # _____subfields_____
         # If there are no sufficient fields, the fields are added
         # with empty value:""
-        _c_w_e_list = _c_w_e_list + [""]*(3-len(_c_w_e_list))
+        _c_w_e_list = _c_w_e_list + [u""]*(3-len(_c_w_e_list))
         _cif = _c_w_e_list[0]
         _web = _c_w_e_list[1]
         _email = _c_w_e_list[2]
-        self.__budget.setCompany(_company_code, _sumamary, _name, 
-                           _local_offices, _cif, _web, _email)
+        self.__budget.setCompany(_company_code.encode("utf8"),
+                    _sumamary.encode("utf8"), _name.encode("utf8"), 
+                    _local_offices, _cif.encode("utf8"),
+                    _web.encode("utf8"), _email.encode("utf8"))
         self.num_valid_record = self.num_valid_record +1
     
     def _parseX(self, field_list):
@@ -1527,33 +1567,33 @@
         # "control": "[\t \n\r]"
         _field_1 = self.delete_control_space(field_list[0])
         _field_2 = self.delete_control_space(field_list[1])
-        if _field_1 == "":
+        if _field_1 == u"":
             # A)
-            _field_2_list = _field_2.split("\\")
+            _field_2_list = _field_2.split(u"\\")
             _ti_index = 0
             while _ti_index < len(_field_2_list)-3:
                 _ti_code = _field_2_list[_ti_index]
                 _ti_description = _field_2_list[_ti_index+1]
                 _ti_unit = _field_2_list[_ti_index+2]
                 if _ti_code != "":
-                    self.__budget.addTecInfo(_ti_code, _ti_description,
-                                             _ti_unit)
+                    self.__budget.addTecInfo(_ti_code.encode("utf8"), _ti_description.encode("utf8"),
+                                             _ti_unit.encode("utf8"))
                 _ti_index = _ti_index + 3
         else:
             # B)
             # "#" and "##" characters at the end of the code are erased
             # invalid characters are also erased
             _record_code = self.validateCode(_field_1)
-            _field_2_list = _field_2.split("\\")
+            _field_2_list = _field_2.split(u"\\")
             _ti_index = 0
             _ti_dict = {}
             while _ti_index < len(_field_2_list)-2:
                 _ti_code = _field_2_list[_ti_index]
                 _ti_value = _field_2_list[_ti_index+1]
-                if _ti_code != "" and _ty_value != "":
-                    _ti_dict[_ti_code] = _ty_value
+                if _ti_code != u"" and _ty_value != u"":
+                    _ti_dict[_ti_code.encode("utf8")] = _ty_value.encode("utf8")
                 _ti_index = _ti_index + 2
-            self.__budget.setTecnicalInformation(_record_code, _ti_dict)
+            self.__budget.setTecnicalInformation(_record_code.encode("utf8"), _ti_dict)
         self.num_valid_record = self.num_valid_record +1
 
     def _parseF(self, field_list):
@@ -1584,16 +1624,16 @@
         _files = self.delete_control(field_list[1])
         # _____subfields_____
         # last \ is erased
-        if len(_files) and _files[-1] == "\\":
+        if len(_files) and _files[-1] == u"\\":
             _files = _files[:-1]
-        _files_list = _files.split("\\")
+        _files_list = _files.split(u"\\")
         # adding empty subfiels if necesary
         if len(_files_list)%3 > 0:
-            _files_list.extend[""]*(3 - len(_files_list)%3)
+            _files_list.extend[u""]*(3 - len(_files_list)%3)
         _file_index = 0
         _tested_files_list = []
         while _file_index < len(_files_list)-3:
-            _type = _files_list[_file_index].replace(" ","")
+            _type = _files_list[_file_index].replace(u" ",u"")
 ##            _types = {
 ##                "0": _("others"),
 ##                "1": _("características técnicas y de fabricación"),
@@ -1609,22 +1649,22 @@
 ##                        "empresa"),
 ##                "11": _("certificado/s de empresa"),
 ##                "12": _("obras realizadas")}
-            _types = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
-                      "11", "12"]
+            _types = [u"0", u"1", u"2", u"3", u"4", u"5", u"6", u"7", u"8", u"9", u"10",
+                      u"11", u"12"]
             if not _type in _types:
-                _type = "0"
+                _type = u"0"
             _filenames = _files_list[_file_index + 1]
             _description = _files_list[_file_index + 2]
             _file_index += 3
-            if len(_filenames) and _filenames[-1] == ";":
+            if len(_filenames) and _filenames[-1] == u";":
                 _files = _files[:-1]
-            _filenames_list = _files.split(";")
+            _filenames_list = _files.split(u";")
             _path = os.path.dirname(self.__filename)
             for _filename in filenames_list:
-                _file_path = os.path.join(_path, _filename)
+                _file_path = os.path.join(_path, _filename.encode("utf8"))
                 if os.path.exists(_file_path):
-                    _tested_files_list.append([_file_path, _type,
-                                               _description])
+                    _tested_files_list.append([_file_path, _type.encode("utf8"),
+                                               _description.encode("utf8")])
                 else:
                     _name_ext = os.path.splitext(_filename)
                     _file_name = _name_ext[0]
@@ -1642,23 +1682,23 @@
                     _file_path_lu = os.path.join(_path, _lu)
                     _file_path_ll = os.path.join(_path, _ll)
                     if os.path.exists(_file_path_uu):
-                        _tested_files_list.append([_file_path_uu, _type,
-                                                   _description])
+                        _tested_files_list.append([_file_path_uu, _type.encode("utf8"),
+                                                   _description.encode("utf8")])
                     elif os.path.exists(_grafic_file_path_ul):
-                        _tested_files_list.append([_file_path_ul, _type,
-                                                   _description])
+                        _tested_files_list.append([_file_path_ul, _type.encode("utf8"),
+                                                   _description.encode("utf8")])
                     elif os.path.exists(_grafic_file_path_lu):
-                        _tested_files_list.append([_file_path_lu, _type,
-                                                   _description])
+                        _tested_files_list.append([_file_path_lu, _type.encode("utf8"),
+                                                   _description.encode("utf8")])
                     elif os.path.exists(_grafic_file_path_ll):
-                        _tested_files_list.append([_file_path_ll, _type,
-                                                   _description])
+                        _tested_files_list.append([_file_path_ll, _type.encode("utf8"),
+                                                   _description.encode("utf8")])
                     else:
                         print utils.mapping(_("The file $1 do not exist"),
                             (_file_path,))
         if len(_tested_files_list) > 0:
             for _file in _tested_file_list:
-                self.__budget.addFile(_record_code, _file[0], file[1], file[2])
+                self.__budget.addFile(_record_code.encode("utf8"), _file[0], file[1], file[2])
         self.num_valid_record = self.num_valid_record +1
 
     def _parseB(self, field_list):
@@ -1718,13 +1758,13 @@
         # _____Labels_____
         # last \ is erased
         # TODO: change the others parsers to this:
-        while len(_labels) > 0 and _labels[-1] == "\\":
+        while len(_labels) > 0 and _labels[-1] == u"\\":
             _labels = _labels[:-1]
         # replace "_" to " "
-        _labels = _labels.replace("_"," ")
-        _label_list = _labels.split("\\")
+        _labels = _labels.replace(u"_",u" ")
+        _label_list = _labels.split(u"\\")
         for _label in _label_list:
-            self.__budget.addLabel(_code, _label)
+            self.__budget.addLabel(_code.encode("utf8"), _label.encode("utf8"))
         self.num_valid_record = self.num_valid_record + 1
 
     def _parseP(self, field_list):
@@ -1745,13 +1785,13 @@
         if len(field_list) > 2:
             # delete control caracters and spaces
             _family_code = self.delete_control_space(field_list[1])
-            if _family_code == "": # A)Global paremetric record
+            if _family_code == u"": # A)Global paremetric record
                 # The record must have 3 or 4 fields
                 if len(field_list) > 4:
                     field_list = field_list[0:4]
                 field_list = field_list[1:]
                 if len(field_list) == 2:
-                    field_list.append("")
+                    field_list.append(u"")
                 if len(field_list) != 3:
                     return
             else: # B)Family Parametric record
@@ -1766,16 +1806,16 @@
             return
         # _____Description_____
         _description = field_list[1]
-        if _description == "":
+        if _description == u"":
             print _("PyArq hates parametric DLLs")
             return
         # Adding last end of line
-        _description = _description + "\r\n"
+        _description = _description + u"\r\n"
         # Delete comments
         # "comment" : "#.*\r\n"
-        _description = self.__pattern["comment"].sub("\r\n",_description)
+        _description = self.__pattern["comment"].sub(u"\r\n",_description)
         # Tabs to spaces
-        _description = _description.replace("\t"," ")
+        _description = _description.replace(u"\t",u" ")
         # Delete empty lines
         # "empty_line": r"(\r\n) *\r\n"
         while self.__pattern["empty_line"].search(_description):
@@ -1784,12 +1824,12 @@
         # Delete spaces before and after /
         # "space_before_backslash" : r"( )+\\"
         _description = self.__pattern["space_before_backslash"].sub(
-                        r"\\",_description)
+                        ur"\\",_description)
         # "space_after_backslash" : r"\\( )+"
         _description = self.__pattern["space_after_backslash"].sub(
-                        r"\\",_description)
+                        ur"\\",_description)
         # Join lines that start but not end with /
-        _description = "\r\n" + _description # add leading end of line
+        _description = u"\r\n" + _description # add leading end of line
         # "start_noend_backslash": "(\r\n\\\.*[^\\\])\r\n"
         while self.__pattern["start_noend_backslash"].search(_description):
             _description = self.__pattern["start_noend_backslash"].sub(
@@ -1805,8 +1845,8 @@
                             lambda x: x.groups()[0], _description)
         _description = _description[2:]  # remove leading end of line
         #_description = re.sub(r"\\( )+",r"\\",_description)
-        _lines = _description.split("\r\n")
-        _final_description = ""
+        _lines = _description.split(u"\r\n")
+        _final_description = u""
         _pass_line = 0
         for index in range(len(_lines)):
             _line = _lines[index]
@@ -1814,44 +1854,44 @@
             if len(_line) != 0: # Delete empty lines
                 if _pass_line > 0:
                     _pass_line = _pass_line -1
-                    _line = ""
+                    _line = u""
                 elif _line.isspace():
-                    _line = ""
-                elif  _line[0] != "\\":
+                    _line = u""
+                elif  _line[0] != u"\\":
                     # Delete spaces out "" delimiter
-                    _list = _line.split('"')
-                    _final_line = ""
+                    _list = _line.split(u'"')
+                    _final_line = u""
                     for index1 in range(len(_list)):
                         if index1 % 2 != 0:
-                            _parcial_line = '"' + _list[index1]
+                            _parcial_line = u'"' + _list[index1]
                         else:
-                            _parcial_line =  '"' + _list[index1].replace(" ","")
+                            _parcial_line =  u'"' + _list[index1].replace(u" ",u"")
                         _final_line = _final_line + _parcial_line
                     _line = _final_line[1:]
                     _lines[index] = _line
                     # parse data
-                    if len(_line) > 2 and _line[:2] == "::":
+                    if len(_line) > 2 and _line[:2] == u"::":
                         # Delete spaces out " delimiter
                         #print "__PRECIO__" + _line[2:]
                         pass
-                    elif len(_line) > 2 and _line[:2] == "%:":
+                    elif len(_line) > 2 and _line[:2] == u"%:":
                         # Delete spaces out " delimiter
                         #print "__%AUX__" + _line[2:]
                         pass
-                    elif len(_line) > 3 and _line[:2] == "%%:":
+                    elif len(_line) > 3 and _line[:2] == u"%%:":
                         # Delete spaces out " delimiter
                         #print "__%%AUX__" + _line[2:]
                         pass
                     elif self.__pattern["var"].search(_line):
                         # Delete spaces out " delimiter
                         #print "line =", _line
-                        while _line.count('"') % 2 == 1 and \
+                        while _line.count(u'"') % 2 == 1 and \
                               index + _pass_line + 1 < len(_lines) -1:
                             _line = _line + _lines[index + _pass_line + 1]
                             _pass_line = _pass_line + 1
                         _search = self.__pattern["var"].search(_line)
                         if _search is not None:
-                            _var = _search.groups()[0] + " = " + _search.groups()[1]
+                            _var = _search.groups()[0] + u" = " + _search.groups()[1]
                             #print "__VAR__" + str(_var)
                             pass
                         else:
@@ -1862,67 +1902,67 @@
                         #_patern = "(^[^:]*):(.*)$"
                         _search = self.__pattern["descomposition"].search(_line)
                         if _search is not None:
-                            _var = _search.groups()[0] + ":" + _search.groups()[1]
+                            _var = _search.groups()[0] + u":" + _search.groups()[1]
                             #print "__Descomposición__" + str(_var)
                             pass
                         else:
                             #print "no __Descomposición__", _line
                             pass
                     else:
-                        print "Parametric: code: " + _family_code
+                        print "Parametric: code: " + _family_code.encode("utf8")
                         print "******* Desconocido *** : " + _line
-                        if index-10 > 0: print "-11 :", _lines[index-11]
-                        if index-10 > 0: print "-10 :", _lines[index-10]
-                        if index-9 > 0: print "-9 :", _lines[index-9]
-                        if index-8 > 0: print "-8 :", _lines[index-8]
-                        if index-7 > 0: print "-7 :", _lines[index-7]
-                        if index-6 > 0: print "-6 :", _lines[index-6]
-                        if index-5 > 0: print "-5 :", _lines[index-5]
-                        if index-4 > 0: print "-4 :", _lines[index-4]
-                        if index-3 > 0: print "-3 :", _lines[index-3]
-                        if index-2 > 0: print "-2 :", _lines[index-2]
-                        if index-1 > 0: print "-1 :", _lines[index-1]
+                        if index-10 > 0: print "-11 :", _lines[index-11].encode("utf8")
+                        if index-10 > 0: print "-10 :", _lines[index-10].encode("utf8")
+                        if index-9 > 0: print "-9 :", _lines[index-9].encode("utf8")
+                        if index-8 > 0: print "-8 :", _lines[index-8].encode("utf8")
+                        if index-7 > 0: print "-7 :", _lines[index-7].encode("utf8")
+                        if index-6 > 0: print "-6 :", _lines[index-6].encode("utf8")
+                        if index-5 > 0: print "-5 :", _lines[index-5].encode("utf8")
+                        if index-4 > 0: print "-4 :", _lines[index-4].encode("utf8")
+                        if index-3 > 0: print "-3 :", _lines[index-3].encode("utf8")
+                        if index-2 > 0: print "-2 :", _lines[index-2].encode("utf8")
+                        if index-1 > 0: print "-1 :", _lines[index-1].encode("utf8")
                         print "-0 :", _lines[index-0]
                         pass
                 else:
-                    _parameter_list = _line.split("\\")[1:-1]
+                    _parameter_list = _line.split(u"\\")[1:-1]
                     if len(_parameter_list) >= 2:
-                        if _parameter_list[0] == "C" or \
-                           _parameter_list[0] == "COMENTARIO":
+                        if _parameter_list[0] == u"C" or \
+                           _parameter_list[0] == u"COMENTARIO":
                             #print "__COMENTARIO__" + _parameter_list[1]
                             self.__budget.setParametricSelectComment(
-                                _family_code, _parameter_list[1])
-                        elif _parameter_list[0] == "R" or \
-                           _parameter_list[0] == "RESUMEN":
+                                _family_code.encode("utf8"), _parameter_list[1].encode("utf8"))
+                        elif _parameter_list[0] == u"R" or \
+                           _parameter_list[0] == u"RESUMEN":
                             #print "__RESUMEN__" + _parameter_list[1]
-                            self.__budget.setParametricSummary(_family_code,
-                                _parameter_list[1])
-                        elif _parameter_list[0] == "T" or \
-                           _parameter_list[0] == "TEXTO":
+                            self.__budget.setParametricSummary(_family_code.encode("utf8"),
+                                _parameter_list[1].encode("utf8"))
+                        elif _parameter_list[0] == u"T" or \
+                           _parameter_list[0] == u"TEXTO":
                             #print "__TEXTO__" + _parameter_list[1]
-                            self.__budget.setParametricText(_family_code,
-                                _parameter_list[1])
-                        elif _parameter_list[0] == "P" or \
-                           _parameter_list[0] == "PLIEGO":
+                            self.__budget.setParametricText(_family_code.encode("utf8"),
+                                _parameter_list[1].encode("utf8"))
+                        elif _parameter_list[0] == u"P" or \
+                           _parameter_list[0] == u"PLIEGO":
                             #print "__PLIEGO__" + str(_parameter_list[1:])
                             pass
-                        elif _parameter_list[0] == "K" or \
-                           _parameter_list[0] == "CLAVES":
+                        elif _parameter_list[0] == u"K" or \
+                           _parameter_list[0] == u"CLAVES":
                             #print "__CLAVES__" + str(_parameter_list[1:])
                             pass
-                        elif _parameter_list[0] == "F" or \
-                           _parameter_list[0] == "COMERCIAL":
+                        elif _parameter_list[0] == u"F" or \
+                           _parameter_list[0] == u"COMERCIAL":
                             #print "__COMERCIAL__" + str(_parameter_list[1:])
                             pass
                         else:
                             #print "==PARAMETRO==" + str(_parameter_list[:])
                             pass
-                _final_description = _final_description + _line + "\r\n"
+                _final_description = _final_description + _line + u"\r\n"
                 
                 #print _line
         # Delete last empty line
         _description = _final_description[:-2]
-        _lines = _description.split("\r\n")
+        _lines = _description.split(u"\r\n")
         for _line in _lines:
             pass
             #print _line
@@ -1995,31 +2035,30 @@
                 # remove leading spaces
                 if _version in self.__character_sets_dict:
                     self.__character_set = self.__character_sets_dict[_version]
+                    print utils.mapping(_("FIEBDC character encoding: $1"),(self.__character_set,))
                 else:
-                    print utils.mapping(_("This codepage do not exist in "\
-                         "FIEBDC3! Default codepage: $1"),
+                    print utils.mapping(_("This Character encoding do not exist in "\
+                         "FIEBDC3! Default Character encoding: $1"),
                          (self.__character_set,))
             else:
-                print utils.mapping(_("This V record dot have a codepage! "\
-                         "Default codepage: $1"),
+                print utils.mapping(_("This V record dot have a character encoding! "\
+                         "Default character encoding: $1"),
                          (self.__character_set,))
         else:
-            print utils.mapping(_("Not 'V' record in File! Default codepage: "\
+            print utils.mapping(_("Not 'V' record in File! Default character encoding: "\
                   "$1"), (self.__character_set,))
-        if self.__character_set != "utf8":
-            _buffer = unicode(_buffer, self.__character_set)
-            _buffer = _buffer.encode("utf8")
+        _buffer = unicode(_buffer, self.__character_set)
         # Any INFORMATION between the beginning of the file and the
         # beginning of the first registry “~” is ignored
         #"after_first_tilde" : "^[^~]*~"
         _buffer = self.__pattern["after_first_tilde"].sub("",_buffer)
-        while _buffer != "" and not self.__cancel:
+        while _buffer != u"" and not self.__cancel:
             #-# the blank characters (32), tabs (9) and end of line (13 and 10)
             # before the separators '~', '|' are erased.
             # Before separator \ not deleted because it affects the reading of
             # the record ~P
             _buffer = self.eraseControlCharacters(_buffer)
-            _record_list = _buffer.split("~")
+            _record_list = _buffer.split(u"~")
             # The last record can be incomplete unless it is the last one of
             # the file
             if len(_record_list) > 1:
@@ -2030,18 +2069,16 @@
                 # The blank characters (32), tabs (9) and end of line
                 # (13 and 10) at the end of the file are ignored.
                 #"end_control" : "((\r\n)| |\t)+$"
-                _record_list[-1] = self.__pattern["end_control"].sub("",
+                _record_list[-1] = self.__pattern["end_control"].sub(u"",
                                            _record_list[-1])
-                _last_record = ""
+                _last_record = u""
             for record in _record_list:
                 if self.__cancel:
                     break
                 self.parseRecord(record)
             interface.progress(_file.tell() / _filesize)
             _buffer2 = _file.read(100000)
-            if self.__character_set != "utf8":
-                _buffer2 = unicode(_buffer2, self.__character_set)
-                _buffer2 = _buffer2.encode("utf8")
+            _buffer2 = unicode(_buffer2, self.__character_set)
             _buffer = _last_record + _buffer2
         _file.close()
         if self.__cancel:
@@ -2115,11 +2152,11 @@
 
     def delete_control_space(self, text):
         text = self.delete_control(text)
-        text = text.replace(" ", "")
+        text = text.replace(u" ", u"")
         return text
 
     def delete_control(self, text):
-        text = text.replace("\t", "")
-        text = text.replace("\r", "")
-        text = text.replace("\n", "")
+        text = text.replace(u"\t", u"")
+        text = text.replace(u"\r", u"")
+        text = text.replace(u"\n", u"")
         return text
author	Miguel Ángel Bárcena Rodríguez <miguelangel@obraencurso.es>
date	Sat, 02 Nov 2013 19:26:09 +0100
parents	0359329a1c26
children	878159a13494