Mercurial > pyarq-presupuestos
diff Generic/fiebdc.py @ 17:a7b9f7e7dfa4
Improvements importing FIEBDC files
author | Miguel Ángel Bárcena Rodríguez <miguelangel@obraencurso.es> |
---|---|
date | Sat, 02 Nov 2013 19:26:09 +0100 |
parents | 0359329a1c26 |
children | 878159a13494 |
line wrap: on
line diff
--- a/Generic/fiebdc.py Fri Apr 19 00:04:20 2013 +0200 +++ b/Generic/fiebdc.py Sat Nov 02 19:26:09 2013 +0100 @@ -3,7 +3,7 @@ ## File fiebdc.py ## This file is part of pyArq-Presupuestos. ## -## Copyright (C) 2010 Miguel Ángel Bárcena Rodríguez +## Copyright (C) 2010-2013 Miguel Ángel Bárcena Rodríguez ## <miguelangel@obraencurso.es> ## ## pyArq-Presupuestos is free software: you can redistribute it and/or modify @@ -26,7 +26,8 @@ import re import calendar import os.path - +import unicodedata +import hashlib # pyArq-Presupuestos modules import base from Generic import utils @@ -97,24 +98,24 @@ self.__generator = globalVars.version self.__character_set = "850" self.__pattern = { - "control_tilde" : re.compile("((\r\n)| |\t)+~"), - "control_vbar" : re.compile("((\r\n)| |\t)+\|"), - "control_backslash" : re.compile(r"((\r\n)| |\t)+\\"), - "valid_code" : re.compile("[^A-Za-z0-9ñÑ.$#%&_]"), - "special_char": re.compile("[#%&]"), - "no_float": re.compile("[^0-9.]"), - "formula" : re.compile(".*[^0123456789\.()\+\-\*/\^abcdp ].*"), - "comment": re.compile("#.*\r\n"), - "empty_line": re.compile(r"(\r\n) *\r\n"), - "space_before_backslash" : re.compile(r"( )+\\"), - "space_after_backslash" : re.compile(r"\\( )+"), - "start_noend_backslash" : re.compile("(\r\n\\\.*[^\\\])\r\n"), - "end_oper": re.compile("(\+|-|\*|/|/^|@|&|<|>|<=|>=|=|!) *\r\n"), - "matricial_var" : re.compile("(\r\n *[%|\$][A-ZÑ].*=.*,) *\r\n"), - "descomposition" : re.compile("^([^:]+):(.*)$"), - "var" : re.compile("^([$%][A-ZÑ][()0-9, ]*)=(.*)$"), - "after_first_tilde" : re.compile("^[^~]*~"), - "end_control" : re.compile("((\r\n)| |\t)+$"), + "control_tilde" : re.compile(u"((\r\n)| |\t)+~"), + "control_vbar" : re.compile(u"((\r\n)| |\t)+\|"), + "control_backslash" : re.compile(ur"((\r\n)| |\t)+\\"), + "valid_code" : re.compile(u"[^A-Za-z0-9ñÑ.$#%&_]"), + "special_char": re.compile(u"[#%&]"), + "no_float": re.compile(u"[^0-9.]"), + "formula" : re.compile(u".*[^0123456789\.()\+\-\*/\^abcdp ].*"), + "comment": re.compile(u"#.*\r\n"), + "empty_line": re.compile(ur"(\r\n) *\r\n"), + "space_before_backslash" : re.compile(ur"( )+\\"), + "space_after_backslash" : re.compile(ur"\\( )+"), + "start_noend_backslash" : re.compile(u"(\r\n\\\.*[^\\\])\r\n"), + "end_oper": re.compile(u"(\+|-|\*|/|/^|@|&|<|>|<=|>=|=|!) *\r\n"), + "matricial_var" : re.compile(u"(\r\n *[%|\$][A-ZÑ].*=.*,) *\r\n"), + "descomposition" : re.compile(u"^([^:]+):(.*)$"), + "var" : re.compile(u"^([$%][A-ZÑ][()0-9, ]*)=(.*)$"), + "after_first_tilde" : re.compile(u"^[^~]*~"), + "end_control" : re.compile(u"((\r\n)| |\t)+$"), } def cancel(self): @@ -134,9 +135,9 @@ record ~P """ # "control_tilde" : "((\r\n)| |\t)+~" - string = self.__pattern["control_tilde"].sub("~",string) + string = self.__pattern["control_tilde"].sub(u"~",string) # "control_vbar" : "((\r\n)| |\t)+\|" - string = self.__pattern["control_vbar"].sub("|",string) + string = self.__pattern["control_vbar"].sub(u"|",string) # "control_backslash" : r"((\r\n)| |\t)+\\" #string = self.__pattern["control_backslash"].sub(r"\\",string) return string @@ -147,31 +148,44 @@ Test if the code have invalid characters and try to erase it, if it is posible return a valid code else return a empty string. """ - if not isinstance(code, str): - print _("Invalid code, it must be a string") - return "" - # Valid chararcter: A-Z a-z 0-9 ñ Ñ . $ # % & _ + if not isinstance(code, unicode): + print _("Invalid code, it must be a unicode string") + return u"" + # Valid chararcter: A-Z a-z 0-9 ñ Ñ . $ # % & _ # "valid_code" : "[^A-Za-z0-9ñÑ.$#%&_]" - _code = self.__pattern["valid_code"].sub("", code) - if _code != code: - print utils.mapping(_("The code '$1' have invalid characters."), - (code,)) - code = _code + _ucode = self.__pattern["valid_code"].sub(u"", code) + if _ucode != code: + try: + print utils.mapping(_("The code '$1' have invalid characters."), + (code.encode("utf8"),)) + except: + print utils.mapping(_("The code '$1' have invalid characters and can not be encoded in utf8."), (code,)) + + if len(_ucode) == 0: + _normalize_code = ''.join((c for c in unicodedata.normalize('NFD', _ucode) if unicodedata.category(c) != 'Mn')) + # from http://www.leccionespracticas.com/uncategorized/eliminar-tildes-con-python-solucionado/ + _ucode = self.__pattern["valid_code"].sub(u"", _normalize_code) + if len(_ucode) == 0: + _hash_code = hashlib.sha256() + _hash_code.update(code.encode('utf-8')) + _hexdigest_code = _hash_code.hexdigest() + _ucode = self.__pattern["valid_code"].sub(u"", _hexdigest_code) + code = _ucode # the lasts characters can not be <#> or <##> # <##> -> root record in FIEFDC-3 # <#> -> chapter record in FIEFDC-3 if len(code) > 0: - while code[-1] == "#": + while code[-1] == u"#": code = code[:-1] if len(code) > 20: code = code[:20] # only one charecter # % or & - if sum([code.count(c) for c in '#%&']) > 1: + if sum([code.count(c) for c in u'#%&']) > 1: print utils.mapping(_("The code '$1' contains special "\ - "characters repeated."),(code,)) - _i = min([code.find(c) for c in '#%&']) + "characters repeated."),(code.encode("utf8"),)) + _i = min([code.find(c) for c in u'#%&']) code = code[:_i+1] + \ - self.__pattern["special_char"].sub("", code[_i+1:]) + self.__pattern["special_char"].sub(u"", code[_i+1:]) return code def parseDate(self, date): @@ -187,11 +201,11 @@ or None if the date format is invalid """ # All characters must be numbers, len <= 8 and not empty string - if not date.isdigit() or len(date) > 8 or date == "": + if not date.isdigit() or len(date) > 8 or date == u"": return None else: if len(date)%2 == 1: # uneven len: add a leading 0 - date = "0" + date + date = u"0" + date if len(date) == 8: _d = int(date[:2]) _m = int(date[2:4]) @@ -307,66 +321,66 @@ # TODO: ~P. Registro tipo Descripción Paramétrica. # TODO: ~O. Registro tipo Relación Comercial. # TODO: test records - _field_list = record.split("|") + _field_list = record.split(u"|") self._record_number = self._record_number +1 _budget = self.__budget - if _field_list[0] == "V": + if _field_list[0] == u"V": self._record_V_number += 1 self._parseV(_field_list) - elif _field_list[0] == "C": + elif _field_list[0] == u"C": self._record_C_number += 1 self._parseC(_field_list) - elif _field_list[0] == "D": + elif _field_list[0] == u"D": self._record_D_number += 1 self._parseDY(_field_list) - elif _field_list[0] == "Y": + elif _field_list[0] == u"Y": self._record_Y_number += 1 self._parseDY(_field_list) - elif _field_list[0] == "M": + elif _field_list[0] == u"M": self._record_M_number += 1 self._parseMN(_field_list) - elif _field_list[0] == "N": + elif _field_list[0] == u"N": self._record_N_number += 1 self._parseMN(_field_list) - elif _field_list[0] == "T": + elif _field_list[0] == u"T": self._record_T_number += 1 self._parseT(_field_list) - elif _field_list[0] == "K": + elif _field_list[0] == u"K": self._record_K_number += 1 self._parseK(_field_list) - elif _field_list[0] == "W": + elif _field_list[0] == u"W": self._record_W_number += 1 self._parseW(_field_list) - elif _field_list[0] == "L": + elif _field_list[0] == u"L": self._record_L_number += 1 self._parseL(_field_list) - elif _field_list[0] == "Q": + elif _field_list[0] == u"Q": self._record_Q_number += 1 self._parseQ(_field_list) - elif _field_list[0] == "J": + elif _field_list[0] == u"J": self._record_J_number += 1 self._parseJ(_field_list) - elif _field_list[0] == "G": + elif _field_list[0] == u"G": self._record_G_number += 1 self._parseG(_field_list) - elif _field_list[0] == "E": + elif _field_list[0] == u"E": self._record_E_number += 1 self._parseE(_field_list) elif _field_list[0] == "O": self._record_O_number += 1 - elif _field_list[0] == "P": + elif _field_list[0] == u"P": self._record_P_number += 1 self._parseP(_field_list) - elif _field_list[0] == "X": + elif _field_list[0] == u"X": self._record_X_number += 1 self._parseX(_field_list) - elif _field_list[0] == "B": + elif _field_list[0] == u"B": self._record_B_number += 1 self._parseB(_field_list) - elif _field_list[0] == "F": + elif _field_list[0] == u"F": self._record_F_number += 1 self._parseF(_field_list) - elif _field_list[0] == "A": + elif _field_list[0] == u"A": self._record_A_number += 1 self._parseA(_field_list) else: @@ -401,7 +415,7 @@ # If there are no sufficient fields, the fields are added # with empty value:"" else: - field_list = field_list + [""]*(10-len(field_list)) + field_list = field_list + [u""]*(10-len(field_list)) # control character are erased: end of line, tab, space # only leading and trailing whitespace in owner, generator, comment # _____Fields_____ @@ -414,42 +428,45 @@ _header_title = field_list[4].strip() _header_title = self.delete_control(_header_title) _character_set = self.delete_control_space(field_list[5]) - _comment = field_list[6].strip("\t \n\r") + _comment = field_list[6].strip(u"\t \n\r") _data_type = self.delete_control_space(field_list[7]) _number_certificate = self.delete_control_space(field_list[8]) __date_certificate = self.delete_control_space(field_list[9]) # _____Owner_____ self.__budget.setOwner(_owner) # _____Version-Date_____ - _version_date = _version_date.split("\\") + _version_date = _version_date.split(u"\\") _file_format = _version_date[0] if _file_format in self.__format_list: self.__file_format = _file_format - print _("FIEBDC format: %s" % _file_format) + print utils.mapping(_("FIEBDC format: $1"),(_file_format,)) + if len(_version_date) > 1: _date = _version_date[1] - if _date != "": + if _date != u"": _parsed_date = self.parseDate(_date) if _parsed_date is not None: self.__budget.setDate(_parsed_date) # _____Generator_____ # ignored field - print _("FIEBDC file generated by %s" % _generator) + print utils.mapping(_("FIEBDC file generated by $1"),(_generator,)) # _____Header_Title_____ - _header_title = _header_title.split("\\") + _header_title = _header_title.split(u"\\") _header_title = [_title.strip() for _title in _header_title] _header = _header_title.pop(0) + _header = [_item.encode("utf8") for _item in _header] _title = [ ] for _title_index in _header_title: - if _title_index != "": + if _title_index != u"": _title.append(_title_index) - if _header != "": - self.__budget.setTitleList([ _header, _title ]) + _title = [_item.encode("utf8") for _item in _title] + if _header != u"": + self.__budget.setTitleList([ _header, _title]) # _____Characters_set_____ # field parsed in readFile method # _____Comment_____ - if _comment != "": - self.__budget.setComment(_comment) + if _comment != u"": + self.__budget.setComment(_comment.encode("utf8")) # _____Data type_____ # 1 -> Base data. # 2 -> Budget. @@ -508,16 +525,16 @@ _field1 = self.delete_control_space(field_list[1]) _field2 = self.delete_control_space(field_list[2]) # _____Field 1_____ - if len(_field1) > 0 and _field1[-1] == "\\": + if len(_field1) > 0 and _field1[-1] == u"\\": _field1 = _field1[:-1] # if there are a \ character at the end it must be erased - _percentages = _field1.split("\\") + _percentages = _field1.split(u"\\") if len(_percentages) > 5: _percentages = _percentages[:5] # If there are no sufficient subfields, the subfields are added # with empty value:"" else: - _percentages = _percentages + [""]*(5-len(_percentages)) + _percentages = _percentages + [u""]*(5-len(_percentages)) _percentage_titles = [ "CI", "GG", "BI", "BAJA", "IVA" ] _percentage_dict = {} for _percentage_index in range(len(_percentages)): @@ -534,12 +551,12 @@ _title_num = len(self.__budget.getTitleList()[1]) if _title_num == 0: _title_num = 1 # If the field 2 is empty, the field 0 is readed - if _field2 == "": + if _field2 == u"": # _____Field 0_____ - if _field0[-1] == "\\": + if _field0[-1] == u"\\": _field0 = _field0[:-1] # if there are a \ character at the end it must be erased - _decimal_list = _field0.split("\\") + _decimal_list = _field0.split(u"\\") _decimal_index = 0 if len(_decimal_list)%9 != 0: # if it is not multiple of 9, empty subfield are added @@ -740,7 +757,7 @@ # If there are no sufficient fields, the fields are added # with empty value:"" else: - field_list = field_list + [""]*(7-len(field_list)) + field_list = field_list + [u""]*(7-len(field_list)) # control character are erased: en of line, tab, space # _____Fields_____ _record_type = field_list[0] @@ -751,13 +768,13 @@ _dates = self.delete_control_space(field_list[5]) _type = self.delete_control_space(field_list[6]) # _____Code_____ - _codes = _codes.split("\\") + _codes = _codes.split(u"\\") if len(_codes) > 0: # parse the hierarchy of the first code # hierarchy: 0->root, 1->Chapter/subchapter, 2->other - if len(_codes[0]) > 2 and _codes[0][-2:] == "##": + if len(_codes[0]) > 2 and _codes[0][-2:] == u"##": _hierarchy = 0 - elif len(_codes[0]) > 1 and _codes[0][-1:] == "#": + elif len(_codes[0]) > 1 and _codes[0][-1:] == u"#": _hierarchy = 1 else: _hierarchy = 2 @@ -766,12 +783,12 @@ # maximun len 20 characters _codes = [self.validateCode(_code) for _code in _codes] # empty codes are ignored - while "" in _codes: - _codes.remove("") + while u"" in _codes: + _codes.remove(u"") if len(_codes) > 0: #TODO: test this _code = _codes[0] - _synonyms = _codes + _synonyms = [synonym.encode("utf8") for synonym in _codes] else: print _("Record C without a valid code") return @@ -781,12 +798,12 @@ # nothing to do # _____Price_____ and _____Dates_____ # last \ is erased - if len(_dates) > 0 and _dates[-1] == "\\": + if len(_dates) > 0 and _dates[-1] == u"\\": _dates = _dates[:-1] - if len(_prices) > 0 and _prices[-1] == "\\": + if len(_prices) > 0 and _prices[-1] == u"\\": _prices = _prices[:-1] - _dates = _dates.split("\\") - _prices = _prices.split("\\") + _dates = _dates.split(u"\\") + _prices = _prices.split(u"\\") # number of prices = number of titles in "V" line # if there are no sufficient prices it takes the last price defined _title_num = len(self.__budget.getTitleList()[1]) @@ -842,59 +859,60 @@ # 2 -> None,Q,% # 3 -> None,MC,MCr,MM,MS,ME,MCu,Mal,ML,M if _hierarchy == 0: - if _type == "OB": + if _type == u"OB": _subtype = _type _type = 0 - elif _type == "0" or _type == "": - _subtype = "" + elif _type == u"0" or _type == u"": + _subtype = u"" _type = 0 else: print utils.mapping(_("Incorrect type ($1) in the code $2"), - (str(_type), _code)) + (_type.encode("utf8"), _code.encode("utf8"))) _type = 0 - _subtype = "" + _subtype = u"" elif _hierarchy == 1: - if _type == "PU": + if _type == u"PU": _subtype = _type _type = 0 - elif _type == "0" or _type == "": - _subtype = "" + elif _type == u"0" or _type == u"": + _subtype = u"" _type = 0 else: print utils.mapping(_("Incorrect type ($1) in the code $2"), - (str(_type), _code)) + (_type.encode("utf8"), _code.encode("utf8"))) _type = 0 - _subtype = "" + _subtype = u"" else: - if _type == "EA" or _type == "EU" or _type == "EC" or \ - _type == "EF" or _type == "PA": + if _type == u"EA" or _type == u"EU" or _type == u"EC" or \ + _type == u"EF" or _type == u"PA": _subtype = _type _type = 0 - elif _type == "H": + elif _type == u"H": _subtype = _type _type = 1 - elif _type == "Q" or _type == "%": + elif _type == u"Q" or _type == u"%": _subtype = _type _type = 2 - elif _type == "MC" or _type == "MCr" or _type == "MM" or \ - _type == "MS" or _type == "ME" or _type == "MCu" or \ - _type == "Mal" or _type == "ML" or _type == "M": + elif _type == u"MC" or _type == u"MCr" or _type == u"MM" or \ + _type == u"MS" or _type == u"ME" or _type == u"MCu" or \ + _type == u"Mal" or _type == u"ML" or _type == u"M": _subtype = _type _type = 3 - elif _type == "0" or _type == "1" or _type == "2" or \ - _type == "3": - _subtype = "" + elif _type == u"0" or _type == u"1" or _type == u"2" or \ + _type == u"3": + _subtype = u"" _type = int(_type) - elif _type == "": - _subtype = "" + elif _type == u"": + _subtype = u"" _type = 0 else: print utils.mapping(_("Incorrect type ($1) in the code $2"), - (str(_type), _code)) + (_type.encode("utf8"), _code.encode("utf8"))) _type = 0 - _subtype = "" - self.__budget.setRecord(_code, _synonyms, _hierarchy, - _unit, _summary, _prices, _dates, _type, _subtype) + _subtype = u"" + self.__budget.setRecord(_code.encode("utf8"), _synonyms, _hierarchy, + _unit.encode("utf8"), _summary.encode("utf8"), + _prices, _dates, _type, _subtype.encode("utf8")) self.num_valid_record = self.num_valid_record + 1 def _parseDY(self, field_list): @@ -913,7 +931,7 @@ # If there are no sufficient fields, the fields are added # with empty value:"" else: - field_list = field_list + [""]*(3-len(field_list)) + field_list = field_list + [u""]*(3-len(field_list)) # control character are erased: end of line, tab, space # _____Fields_____ _record_type = field_list[0] @@ -922,10 +940,10 @@ # _____Code_____ # "#" and "##" characters at the end of the code are erased # invalid characters are also erased - _code = self.validateCode(_code) + _code = self.validateCode(_code) # _____children_____ # TODO: test the number of decimals in factor an yield values - _children = _children.split( "\\" ) + _children = _children.split(u"\\") _children_list = [ ] _child_index = 0 while _child_index < len(_children)-3: @@ -936,7 +954,7 @@ # _____child_code_____ _child_code = self.validateCode(_child_code) # _____factor_____ - if _factor != "": + if _factor != u"": try: _factor = float(_factor) except ValueError: @@ -944,10 +962,10 @@ "descomposition of the record $1, the factor "\ "of the child $2 must be a float number and "\ "can not be $3, seted default value 1.0"), - (_code, _child_code, _factor)) + (_code.encode("utf8"), _child_code.encode("utf8"), _factor.encode("utf8"))) _factor = 1.0 #____yield___ - if _yield != "": + if _yield != u"": try: _yield = float(_yield) except ValueError: @@ -955,15 +973,15 @@ "descomposition of the record $1, the yield of "\ "the child $2, must be a float number and can"\ "not be $3, seted default value 1.0"), - (_code, _child_code, _factor)) + (_code.encode("utf8"), _child_code.encode("utf8"), _factor.encode("utf8"))) _yield = 1.0 - if _child_code != "" and _code != "": + if _child_code != u"" and _code != u"": _children_list.append([_child_code, _factor, _yield ]) - if _record_type == "D": + if _record_type == u"D": _position = _child_index / 3 else: #_record_type == "Y" _position = -1 - self.__budget.setTree(_code, _child_code, _position, _factor, + self.__budget.setTree(_code.encode("utf8"), _child_code.encode("utf8"), _position, _factor, _yield, "", "", "", "") _child_index = _child_index + 3 self.num_valid_record = self.num_valid_record +1 @@ -993,7 +1011,7 @@ # invalid characters are also erased _code = self.validateCode(_code) # _____Text_____ - self.__budget.setText(_code, _text) + self.__budget.setText(_code.encode("utf8"), _text.encode("utf8")) self.num_valid_record = self.num_valid_record + 1 def _parseMN(self, field_list): @@ -1007,7 +1025,6 @@ 4- {Type\Comment\Unit\Length\Width\Height\} 5- [Label] """ - # _____Number of fields_____ # Any INFORMATION after last field separator is ignored # The record must have 6 fields @@ -1016,7 +1033,7 @@ # If there are no sufficient fields, the fields are added # with empty value:"" else: - field_list = field_list + [""]*(6-len(field_list)) + field_list = field_list + [u""]*(6-len(field_list)) # control character are erased: end of line, tab, space # _____Fields_____ _record_type = field_list[0] @@ -1026,59 +1043,69 @@ _lines = self.delete_control(field_list[4]) _label = self.delete_control_space(field_list[5]) # _____Codes_____ - _code_list = _codes.split( "\\" ) + _code_list = _codes.split(u"\\") # "#" and "##" characters at the end of the code are erased # invalid characters are also erased if len(_code_list) == 2: _parent_code = self.validateCode(_code_list[0]) - if _parent_code == "": + if _parent_code == u"": _parent_code = None + else: + _parent_code = _parent_code.encode("utf8") _child_code = self.validateCode(_code_list[1]) elif len(_code_list) == 1: _child_code = self.validateCode(_code_list[0]) _parent_code = None else: print utils.mapping(_("Invalid codes in $1 record, codes $2"), - (_record_type, _codes)) + (_record_type.encode("utf8"), _codes.encode("utf8"))) return - if _child_code == "": + if _child_code == u"": print utils.mapping(_("Empty child code in $1 record, codes: "\ - "$2"), (_record_type, _codes)) + "$2"), (_record_type.encode("utf8"), _codes.encode("utf8"))) return + if _parent_code == None: + # Empty parent code. No-estructured measures. + pass + # _____Path_____ - # TODO: path=0, no-estructured measures - _path_list = _path.split( "\\" ) + _path_list = _path.split( u"\\" ) if len(_path_list) > 0: - while _path_list[-1] == "": + while len(_path_list) > 0 and _path_list[-1] == u"": _path_list = _path_list[:-1] - _path = _path_list[-1] + if len(_path_list) == 0: + # Empty path. No-estructured measures. Path fixed to -2 + _path = -2 + else: + _path = _path_list[-1] try: _path = int(_path) except ValueError: print utils.mapping(_("Invalid path in $1 record, "\ - "codes $2"), (_record_type, _codes)) + "codes $2"), (_record_type.encode("utf8"), _codes.encode("utf8"))) return if _path > 0: _path -= 1 else: - _path = 0 + _path = -2 # _____Total_____ try: _total = float(_total) except ValueError: print utils.mapping(_("Invalid Total Measure value in $1 "\ - "record, codes $2"), (_record_type, _codes)) - return + "record, codes $2. Total fixed to 0."), + (_record_type.encode("utf8"), _codes.encode("utf8"))) + _total = 0 # _____Measure lines_____ - _lines = _lines.split( "\\" ) + _lines = _lines.split(u"\\") _line_index = 0 _line_list = [ ] while _line_index < len(_lines)-6: _linetype = _lines[_line_index] - if _linetype == "": + if _linetype == u"": _linetype = 0 - elif _linetype == "1" or _linetype == "2" or \ - _linetype == "3": + elif _linetype == u"1" or _linetype == u"2" or \ + _linetype == u"3": _linetype = int(_linetype) else: _linetype = 0 @@ -1088,31 +1115,38 @@ if self.__pattern["formula"].match(_comment): print utils.mapping(_("The comment is not a formula or "\ "its have invalid characters, in the $1 record, "\ - "codes $2"), (_record_type, _codes)) + "codes $2"), (_record_type.encode("utf8"), _codes.encode("utf8"))) return else: - _formula = _comment + _formula = _comment.encode("utf8") _comment = "" else: _formula = "" + _comment = _comment.encode("utf8") _units = _lines[_line_index + 2] + _units = self.__pattern["no_float"].sub(u"", _units) _length = _lines[_line_index + 3] + _length = self.__pattern["no_float"].sub(u"", _length) _width = _lines[_line_index + 4] + _width = self.__pattern["no_float"].sub(u"", _width) _height = _lines[_line_index + 5] + _height = self.__pattern["no_float"].sub(u"", _height) + try: - if _units != "": _units = float(_units) - if _length != "": _length = float(_length) - if _width != "": _width = float(_width) - if _height != "": _height = float(_height) + if _units != u"": + _units = float(_units) + if _length != u"": _length = float(_length) + if _width != u"": _width = float(_width) + if _height != u"": _height = float(_height) except ValueError: - print utils.mapping("The measure values are not float "\ - "numbers, code $1", (_codes,)) + print utils.mapping(_("The measure values are not float "\ + "numbers, code $1"), (_codes.encode("utf8"),)) return _line_list.append([_linetype, _comment, _units, _length, _width, _height, _formula]) _line_index = _line_index + 6 - self.__budget.setTree(_parent_code, _child_code, _path, "", "", - _total, _line_list, _label, _record_type) + self.__budget.setTree(_parent_code, _child_code.encode("utf8"), _path, "", "", + _total, _line_list, _label.encode("utf8"), _record_type.encode("utf8")) self.num_valid_record = self.num_valid_record + 1 def _parseW(self, field_list): @@ -1134,9 +1168,9 @@ # _____Fields_____ _code_fields = field_list[0] # last \ is erased - if len(_code_fields) and _code_fields[-1] == "\\": + if len(_code_fields) and _code_fields[-1] == u"\\": _code_fields = _code_fields[:-1] - _code_fields = _code_fields.split("\\") + _code_fields = _code_fields.split(u"\\") _field_dict = {} _field_index = 0 while _field_index < len(_code_fields)-1: @@ -1148,8 +1182,8 @@ #"control": "[\t \n\r]" _field_code = self.delete_control_space(_field_code) # _____section_title_____ - if _field_code != "": - _field_dict[_field_code] = _field_title + if _field_code != u"": + _field_dict[_field_code.encode("utf8")] = _field_title.encode("utf8") _field_index = _field_index + 2 self.__budget.setSheetFields(_field_dict) self.num_valid_record = self.num_valid_record +1 @@ -1173,7 +1207,7 @@ if len(field_list) < 3: return _code = field_list[1] - if _code == "": + if _code == u"": # A: Section Titles # Any INFORMATION after last field separator is ignored # The record must have 3 fields @@ -1183,9 +1217,9 @@ # _____Fields_____ _section_codes = field_list[1] # last \ is erased - if len(_section_codes) and _section_codes[-1] == "\\": + if len(_section_codes) and _section_codes[-1] == u"\\": _section_codes = _section_codes[:-1] - _section_codes = _section_codes.split("\\") + _section_codes = _section_codes.split(u"\\") _section_dict = {} _section_index = 0 while _section_index < len(_section_codes)-1: @@ -1198,8 +1232,8 @@ _section_code = self.delete_control_space(_section_code) # _____section_title_____ _section_title = self.delete_control_space(_section_title) - if _section_code != "": - _section_dict[_section_code] = _section_title + if _section_code != u"": + _section_dict[_section_code.encode("utf8")] = _section_title.encode("utf8") _section_index = _section_index + 2 self.__budget.setSheetSections(_section_dict) self.num_valid_record = self.num_valid_record +1 @@ -1217,15 +1251,15 @@ # invalid characters are also erased _record_code = self.validateCode(_record_code) _scodes_text = field_list[1] - if _scodes_text == "": + if _scodes_text == u"": # TODO: rtf and html files - print "Html and rtf files not implemented in ~L record" + print "Html and rtf files not yet implemented in ~L record" else: # _____Section-code_Section-text_____ # last \ is erased - if len(_scodes_text) and _scodes_text[-1] == "\\": + if len(_scodes_text) and _scodes_text[-1] == u"\\": _scodes_text = _scodes_text[:-1] - _scodes_text = _scodes_text.split("\\") + _scodes_text = _scodes_text.split(u"\\") _paragraph_dict = {} _section_dict = {} _section_index = 0 @@ -1237,14 +1271,14 @@ # _____section_code_____ _section_code = self.delete_control_space(_section_code) # _____section_text_____ - if _section_code != "" and _section_text != "": + if _section_code != u"" and _section_text != u"": #-# paragraph #-# - _paragraph_code = _record_code + _section_code + "*" - _paragraph_dict[ _paragraph_code ] = _section_text - _section_dict[_section_code] = _paragraph_code + _paragraph_code = _record_code + _section_code + u"*" + _paragraph_dict[ _paragraph_code.encode("utf8") ] = _section_text.encode("utf8") + _section_dict[_section_code.encode("utf8")] = _paragraph_code.encode("utf8") _section_index = _section_index + 2 self.__budget.setSheetParagraphs(_paragraph_dict) - self.__budget.setSheetRecord(_record_code, "*", _section_dict) + self.__budget.setSheetRecord(_record_code.encode("utf8"), "*", _section_dict) self.num_valid_record = self.num_valid_record +1 def _parseQ(self, field_list): @@ -1273,9 +1307,9 @@ _record_code = self.validateCode(_record_code) _scodes_pkey = field_list[1] # last \ is erased - if len(_scodes_pkey) and _scodes_pkey[-1] == "\\": + if len(_scodes_pkey) and _scodes_pkey[-1] == u"\\": _scodes_pkey = _scodes_pkey[:-1] - _scodes_pkey = _scodes_pkey.split("\\") + _scodes_pkey = _scodes_pkey.split(u"\\") _field_dict = {} _section_index = 0 while _section_index < len(_scodes_pkey) -1: @@ -1291,21 +1325,21 @@ # _____Fields keys_____ _field_keys = self.delete_control_space(_field_keys) # last ; is erased - if len(_field_keys) and _field_keys[-1] == ";": + if len(_field_keys) and _field_keys[-1] == u";": _field_keys = _field_keys[:-1] - _field_keys_list = _scodes_pkey.split(";") + _field_keys_list = _scodes_pkey.split(u";") for _field_key in _field_keys_list: - if _field_key != "" and _section_code != "" and \ - _paragraph_key != "": + if _field_key != u"" and _section_code != u"" and \ + _paragraph_key != u"": if _field_key in _field_dict: _section_dict = _field_dict[_field_key] else: _section_dict = {} _field_dict[_field_key] = _section_dict - _section_dict[_section_code] = _paragraph_code + _section_dict[_section_code.encode("utf8")] = _paragraph_code.encode("utf8") _section_index = _section_index + 3 for _field, _section_dict in _field_dict.iteritems(): - self.__budget.setSheetRecord(_record_code, _field, _section_dict) + self.__budget.setSheetRecord(_record_code.encode("utf8"), _field.encode("utf8"), _section_dict) self.num_valid_record = self.num_valid_record +1 def _parseJ(self, field_list): @@ -1332,11 +1366,11 @@ _paragraph_code = self.delete_control_space(field_list[0]) # _____Paragraph text_____ _paragraph_text = field_list[1] - if _paragraph_text == "": + if _paragraph_text == u"": # TODO: rtf and html files - print "Html and rtf files not implemented in ~J record" + print "Html and rtf files not yet implemented in ~J record" else: - self.__budget.setSheetParagraph(paragraph_code, paragraph_text) + self.__budget.setSheetParagraph(paragraph_code.encode("utf8"), paragraph_text.encode("utf8")) self.num_valid_record = self.num_valid_record +1 def _parseG(self, field_list): @@ -1366,17 +1400,18 @@ _grafic_files = self.delete_control(field_list[1]) # _____subfields_____ # last \ is erased - if len(_grafic_files) and _grafic_files[-1] == "\\": + if len(_grafic_files) and _grafic_files[-1] == u"\\": _grafic_files = _grafic_files[:-1] - _grafic_file_list = _grafic_files.split("\\") + _grafic_file_list = _grafic_files.split(u"\\") _tested_grafic_file_list = [] for _grafic_file in _grafic_file_list: + _str_grafic_file = _grafic_file.encode("utf8") _path = os.path.dirname(self.__filename) - _grafic_file_path = os.path.join(_path, _grafic_file) + _grafic_file_path = os.path.join(_path, _str_grafic_file) if os.path.exists(_grafic_file_path): _tested_grafic_file_list.append(_grafic_file_path) else: - _name_ext = os.path.splitext(_grafic_file) + _name_ext = os.path.splitext(_str_grafic_file) _grafic_file_name = _name_ext[0] _grafic_file_ext = _name_ext[1] _grafic_file_name_u = _grafic_file_name.upper() @@ -1404,7 +1439,7 @@ (_grafic_file_path,)) if len(_grafic_file_list) > 0: for _grafic_file in _tested_grafic_file_list: - self.__budget.addFile(_record_code, _grafic_file, "img", "") + self.__budget.addFile(_record_code.encode("utf8"), _grafic_file, "img", "") self.num_valid_record = self.num_valid_record +1 def _parseE(self, field_list): @@ -1429,11 +1464,11 @@ # If there are no sufficient fields, the fields are added # with empty value:"" else: - field_list = field_list[1:] + [""]*(6-len(field_list)) + field_list = field_list[1:] + [u""]*(6-len(field_list)) # _____Fields_____ # _____company Code_____ _company_code = self.delete_control_space(field_list[0]) - if _company_code == "": + if _company_code == u"": return # _____Summary_____ @@ -1444,15 +1479,15 @@ _local_offices = self.delete_control(field_list[3]) # _____subfields of local_offices_____ # last \ is erased - if len(_local_offices) and _local_offices[-1] == "\\": + if len(_local_offices) and _local_offices[-1] == u"\\": _local_offices = _local_offices[:-1] - _local_offices_list = _local_offices.split("\\") + _local_offices_list = _local_offices.split(u"\\") # If there are no sufficent subfields, the subfields are added # whith empty value _nsub = len(_local_offices_list) % 10 if _nsub != 0: _local_offices_list = _local_offices_list + \ - [""]*(10-len(field_list)) + [u""]*(10-len(field_list)) _local_offices = [] _local_offices_index = 0 while _local_offices_index < len(_local_offices_list)-9: @@ -1466,39 +1501,44 @@ _country = _local_offices_list[_local_offices_index+6] _phone = _local_offices_list[_local_offices_index+7] # last ; is erased - if len(_phone) and _phone[-1] == ";": + if len(_phone) and _phone[-1] == u";": _phone = _phone[:-1] - _phone_list = _phone.split(";") + _phone_list = _phone.split(u";") + _phone_list = [_phone.encode("utf8") for _phone in _phone_list] _fax = _local_offices_list[_local_offices_index+8] # last ; is erased - if len(_fax) and _fax[-1] == ";": + if len(_fax) and _fax[-1] == u";": _fax = _fax[:-1] - _fax_list = _fax.split(";") + _fax_list = _fax.split(u";") + _fax_list = [_fax.encode("utf8") for _fax in _fax_list] _contact_person = _local_offices_list[_local_offices_index+9] - if _type != "" or _subname != "" or _address != "" or \ - _postal_code != "" or _town != "" or _province != "" or \ - _country != "" or _phone != "" or _fax != "" or \ - _contact_person != "": - _local_offices.append([_type, _subname, _address, - _postal_code, _town, _province, - _country, _phone_list, _fax_list, - _contact_person]) + if _type != u"" or _subname != u"" or _address != u"" or \ + _postal_code != u"" or _town != u"" or _province != u"" or \ + _country != u"" or _phone != u"" or _fax != u"" or \ + _contact_person != u"": + _local_offices.append([_type.encode("utf8"), _subname.encode("utf8"), + _address.encode("utf8"), _postal_code.encode("utf8"), + _town.encode("utf8"), _province.encode("utf8"), + _country.encode("utf8"), _phone_list, + _fax_list, _contact_person.encode("utf8")]) _local_offices_index = _local_offices_index + 10 # _____cif web email_____ _c_w_e = self.delete_control_space(field_list[4]) # last \ is erased - if len(_c_w_e) and _c_w_e[-1] == "\\": + if len(_c_w_e) and _c_w_e[-1] == u"\\": _c_w_e = _c_w_e[:-1] - _c_w_e_list = _c_w_e.split("\\") + _c_w_e_list = _c_w_e.split(u"\\") # _____subfields_____ # If there are no sufficient fields, the fields are added # with empty value:"" - _c_w_e_list = _c_w_e_list + [""]*(3-len(_c_w_e_list)) + _c_w_e_list = _c_w_e_list + [u""]*(3-len(_c_w_e_list)) _cif = _c_w_e_list[0] _web = _c_w_e_list[1] _email = _c_w_e_list[2] - self.__budget.setCompany(_company_code, _sumamary, _name, - _local_offices, _cif, _web, _email) + self.__budget.setCompany(_company_code.encode("utf8"), + _sumamary.encode("utf8"), _name.encode("utf8"), + _local_offices, _cif.encode("utf8"), + _web.encode("utf8"), _email.encode("utf8")) self.num_valid_record = self.num_valid_record +1 def _parseX(self, field_list): @@ -1527,33 +1567,33 @@ # "control": "[\t \n\r]" _field_1 = self.delete_control_space(field_list[0]) _field_2 = self.delete_control_space(field_list[1]) - if _field_1 == "": + if _field_1 == u"": # A) - _field_2_list = _field_2.split("\\") + _field_2_list = _field_2.split(u"\\") _ti_index = 0 while _ti_index < len(_field_2_list)-3: _ti_code = _field_2_list[_ti_index] _ti_description = _field_2_list[_ti_index+1] _ti_unit = _field_2_list[_ti_index+2] if _ti_code != "": - self.__budget.addTecInfo(_ti_code, _ti_description, - _ti_unit) + self.__budget.addTecInfo(_ti_code.encode("utf8"), _ti_description.encode("utf8"), + _ti_unit.encode("utf8")) _ti_index = _ti_index + 3 else: # B) # "#" and "##" characters at the end of the code are erased # invalid characters are also erased _record_code = self.validateCode(_field_1) - _field_2_list = _field_2.split("\\") + _field_2_list = _field_2.split(u"\\") _ti_index = 0 _ti_dict = {} while _ti_index < len(_field_2_list)-2: _ti_code = _field_2_list[_ti_index] _ti_value = _field_2_list[_ti_index+1] - if _ti_code != "" and _ty_value != "": - _ti_dict[_ti_code] = _ty_value + if _ti_code != u"" and _ty_value != u"": + _ti_dict[_ti_code.encode("utf8")] = _ty_value.encode("utf8") _ti_index = _ti_index + 2 - self.__budget.setTecnicalInformation(_record_code, _ti_dict) + self.__budget.setTecnicalInformation(_record_code.encode("utf8"), _ti_dict) self.num_valid_record = self.num_valid_record +1 def _parseF(self, field_list): @@ -1584,16 +1624,16 @@ _files = self.delete_control(field_list[1]) # _____subfields_____ # last \ is erased - if len(_files) and _files[-1] == "\\": + if len(_files) and _files[-1] == u"\\": _files = _files[:-1] - _files_list = _files.split("\\") + _files_list = _files.split(u"\\") # adding empty subfiels if necesary if len(_files_list)%3 > 0: - _files_list.extend[""]*(3 - len(_files_list)%3) + _files_list.extend[u""]*(3 - len(_files_list)%3) _file_index = 0 _tested_files_list = [] while _file_index < len(_files_list)-3: - _type = _files_list[_file_index].replace(" ","") + _type = _files_list[_file_index].replace(u" ",u"") ## _types = { ## "0": _("others"), ## "1": _("características técnicas y de fabricación"), @@ -1609,22 +1649,22 @@ ## "empresa"), ## "11": _("certificado/s de empresa"), ## "12": _("obras realizadas")} - _types = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", - "11", "12"] + _types = [u"0", u"1", u"2", u"3", u"4", u"5", u"6", u"7", u"8", u"9", u"10", + u"11", u"12"] if not _type in _types: - _type = "0" + _type = u"0" _filenames = _files_list[_file_index + 1] _description = _files_list[_file_index + 2] _file_index += 3 - if len(_filenames) and _filenames[-1] == ";": + if len(_filenames) and _filenames[-1] == u";": _files = _files[:-1] - _filenames_list = _files.split(";") + _filenames_list = _files.split(u";") _path = os.path.dirname(self.__filename) for _filename in filenames_list: - _file_path = os.path.join(_path, _filename) + _file_path = os.path.join(_path, _filename.encode("utf8")) if os.path.exists(_file_path): - _tested_files_list.append([_file_path, _type, - _description]) + _tested_files_list.append([_file_path, _type.encode("utf8"), + _description.encode("utf8")]) else: _name_ext = os.path.splitext(_filename) _file_name = _name_ext[0] @@ -1642,23 +1682,23 @@ _file_path_lu = os.path.join(_path, _lu) _file_path_ll = os.path.join(_path, _ll) if os.path.exists(_file_path_uu): - _tested_files_list.append([_file_path_uu, _type, - _description]) + _tested_files_list.append([_file_path_uu, _type.encode("utf8"), + _description.encode("utf8")]) elif os.path.exists(_grafic_file_path_ul): - _tested_files_list.append([_file_path_ul, _type, - _description]) + _tested_files_list.append([_file_path_ul, _type.encode("utf8"), + _description.encode("utf8")]) elif os.path.exists(_grafic_file_path_lu): - _tested_files_list.append([_file_path_lu, _type, - _description]) + _tested_files_list.append([_file_path_lu, _type.encode("utf8"), + _description.encode("utf8")]) elif os.path.exists(_grafic_file_path_ll): - _tested_files_list.append([_file_path_ll, _type, - _description]) + _tested_files_list.append([_file_path_ll, _type.encode("utf8"), + _description.encode("utf8")]) else: print utils.mapping(_("The file $1 do not exist"), (_file_path,)) if len(_tested_files_list) > 0: for _file in _tested_file_list: - self.__budget.addFile(_record_code, _file[0], file[1], file[2]) + self.__budget.addFile(_record_code.encode("utf8"), _file[0], file[1], file[2]) self.num_valid_record = self.num_valid_record +1 def _parseB(self, field_list): @@ -1718,13 +1758,13 @@ # _____Labels_____ # last \ is erased # TODO: change the others parsers to this: - while len(_labels) > 0 and _labels[-1] == "\\": + while len(_labels) > 0 and _labels[-1] == u"\\": _labels = _labels[:-1] # replace "_" to " " - _labels = _labels.replace("_"," ") - _label_list = _labels.split("\\") + _labels = _labels.replace(u"_",u" ") + _label_list = _labels.split(u"\\") for _label in _label_list: - self.__budget.addLabel(_code, _label) + self.__budget.addLabel(_code.encode("utf8"), _label.encode("utf8")) self.num_valid_record = self.num_valid_record + 1 def _parseP(self, field_list): @@ -1745,13 +1785,13 @@ if len(field_list) > 2: # delete control caracters and spaces _family_code = self.delete_control_space(field_list[1]) - if _family_code == "": # A)Global paremetric record + if _family_code == u"": # A)Global paremetric record # The record must have 3 or 4 fields if len(field_list) > 4: field_list = field_list[0:4] field_list = field_list[1:] if len(field_list) == 2: - field_list.append("") + field_list.append(u"") if len(field_list) != 3: return else: # B)Family Parametric record @@ -1766,16 +1806,16 @@ return # _____Description_____ _description = field_list[1] - if _description == "": + if _description == u"": print _("PyArq hates parametric DLLs") return # Adding last end of line - _description = _description + "\r\n" + _description = _description + u"\r\n" # Delete comments # "comment" : "#.*\r\n" - _description = self.__pattern["comment"].sub("\r\n",_description) + _description = self.__pattern["comment"].sub(u"\r\n",_description) # Tabs to spaces - _description = _description.replace("\t"," ") + _description = _description.replace(u"\t",u" ") # Delete empty lines # "empty_line": r"(\r\n) *\r\n" while self.__pattern["empty_line"].search(_description): @@ -1784,12 +1824,12 @@ # Delete spaces before and after / # "space_before_backslash" : r"( )+\\" _description = self.__pattern["space_before_backslash"].sub( - r"\\",_description) + ur"\\",_description) # "space_after_backslash" : r"\\( )+" _description = self.__pattern["space_after_backslash"].sub( - r"\\",_description) + ur"\\",_description) # Join lines that start but not end with / - _description = "\r\n" + _description # add leading end of line + _description = u"\r\n" + _description # add leading end of line # "start_noend_backslash": "(\r\n\\\.*[^\\\])\r\n" while self.__pattern["start_noend_backslash"].search(_description): _description = self.__pattern["start_noend_backslash"].sub( @@ -1805,8 +1845,8 @@ lambda x: x.groups()[0], _description) _description = _description[2:] # remove leading end of line #_description = re.sub(r"\\( )+",r"\\",_description) - _lines = _description.split("\r\n") - _final_description = "" + _lines = _description.split(u"\r\n") + _final_description = u"" _pass_line = 0 for index in range(len(_lines)): _line = _lines[index] @@ -1814,44 +1854,44 @@ if len(_line) != 0: # Delete empty lines if _pass_line > 0: _pass_line = _pass_line -1 - _line = "" + _line = u"" elif _line.isspace(): - _line = "" - elif _line[0] != "\\": + _line = u"" + elif _line[0] != u"\\": # Delete spaces out "" delimiter - _list = _line.split('"') - _final_line = "" + _list = _line.split(u'"') + _final_line = u"" for index1 in range(len(_list)): if index1 % 2 != 0: - _parcial_line = '"' + _list[index1] + _parcial_line = u'"' + _list[index1] else: - _parcial_line = '"' + _list[index1].replace(" ","") + _parcial_line = u'"' + _list[index1].replace(u" ",u"") _final_line = _final_line + _parcial_line _line = _final_line[1:] _lines[index] = _line # parse data - if len(_line) > 2 and _line[:2] == "::": + if len(_line) > 2 and _line[:2] == u"::": # Delete spaces out " delimiter #print "__PRECIO__" + _line[2:] pass - elif len(_line) > 2 and _line[:2] == "%:": + elif len(_line) > 2 and _line[:2] == u"%:": # Delete spaces out " delimiter #print "__%AUX__" + _line[2:] pass - elif len(_line) > 3 and _line[:2] == "%%:": + elif len(_line) > 3 and _line[:2] == u"%%:": # Delete spaces out " delimiter #print "__%%AUX__" + _line[2:] pass elif self.__pattern["var"].search(_line): # Delete spaces out " delimiter #print "line =", _line - while _line.count('"') % 2 == 1 and \ + while _line.count(u'"') % 2 == 1 and \ index + _pass_line + 1 < len(_lines) -1: _line = _line + _lines[index + _pass_line + 1] _pass_line = _pass_line + 1 _search = self.__pattern["var"].search(_line) if _search is not None: - _var = _search.groups()[0] + " = " + _search.groups()[1] + _var = _search.groups()[0] + u" = " + _search.groups()[1] #print "__VAR__" + str(_var) pass else: @@ -1862,67 +1902,67 @@ #_patern = "(^[^:]*):(.*)$" _search = self.__pattern["descomposition"].search(_line) if _search is not None: - _var = _search.groups()[0] + ":" + _search.groups()[1] + _var = _search.groups()[0] + u":" + _search.groups()[1] #print "__Descomposición__" + str(_var) pass else: #print "no __Descomposición__", _line pass else: - print "Parametric: code: " + _family_code + print "Parametric: code: " + _family_code.encode("utf8") print "******* Desconocido *** : " + _line - if index-10 > 0: print "-11 :", _lines[index-11] - if index-10 > 0: print "-10 :", _lines[index-10] - if index-9 > 0: print "-9 :", _lines[index-9] - if index-8 > 0: print "-8 :", _lines[index-8] - if index-7 > 0: print "-7 :", _lines[index-7] - if index-6 > 0: print "-6 :", _lines[index-6] - if index-5 > 0: print "-5 :", _lines[index-5] - if index-4 > 0: print "-4 :", _lines[index-4] - if index-3 > 0: print "-3 :", _lines[index-3] - if index-2 > 0: print "-2 :", _lines[index-2] - if index-1 > 0: print "-1 :", _lines[index-1] + if index-10 > 0: print "-11 :", _lines[index-11].encode("utf8") + if index-10 > 0: print "-10 :", _lines[index-10].encode("utf8") + if index-9 > 0: print "-9 :", _lines[index-9].encode("utf8") + if index-8 > 0: print "-8 :", _lines[index-8].encode("utf8") + if index-7 > 0: print "-7 :", _lines[index-7].encode("utf8") + if index-6 > 0: print "-6 :", _lines[index-6].encode("utf8") + if index-5 > 0: print "-5 :", _lines[index-5].encode("utf8") + if index-4 > 0: print "-4 :", _lines[index-4].encode("utf8") + if index-3 > 0: print "-3 :", _lines[index-3].encode("utf8") + if index-2 > 0: print "-2 :", _lines[index-2].encode("utf8") + if index-1 > 0: print "-1 :", _lines[index-1].encode("utf8") print "-0 :", _lines[index-0] pass else: - _parameter_list = _line.split("\\")[1:-1] + _parameter_list = _line.split(u"\\")[1:-1] if len(_parameter_list) >= 2: - if _parameter_list[0] == "C" or \ - _parameter_list[0] == "COMENTARIO": + if _parameter_list[0] == u"C" or \ + _parameter_list[0] == u"COMENTARIO": #print "__COMENTARIO__" + _parameter_list[1] self.__budget.setParametricSelectComment( - _family_code, _parameter_list[1]) - elif _parameter_list[0] == "R" or \ - _parameter_list[0] == "RESUMEN": + _family_code.encode("utf8"), _parameter_list[1].encode("utf8")) + elif _parameter_list[0] == u"R" or \ + _parameter_list[0] == u"RESUMEN": #print "__RESUMEN__" + _parameter_list[1] - self.__budget.setParametricSummary(_family_code, - _parameter_list[1]) - elif _parameter_list[0] == "T" or \ - _parameter_list[0] == "TEXTO": + self.__budget.setParametricSummary(_family_code.encode("utf8"), + _parameter_list[1].encode("utf8")) + elif _parameter_list[0] == u"T" or \ + _parameter_list[0] == u"TEXTO": #print "__TEXTO__" + _parameter_list[1] - self.__budget.setParametricText(_family_code, - _parameter_list[1]) - elif _parameter_list[0] == "P" or \ - _parameter_list[0] == "PLIEGO": + self.__budget.setParametricText(_family_code.encode("utf8"), + _parameter_list[1].encode("utf8")) + elif _parameter_list[0] == u"P" or \ + _parameter_list[0] == u"PLIEGO": #print "__PLIEGO__" + str(_parameter_list[1:]) pass - elif _parameter_list[0] == "K" or \ - _parameter_list[0] == "CLAVES": + elif _parameter_list[0] == u"K" or \ + _parameter_list[0] == u"CLAVES": #print "__CLAVES__" + str(_parameter_list[1:]) pass - elif _parameter_list[0] == "F" or \ - _parameter_list[0] == "COMERCIAL": + elif _parameter_list[0] == u"F" or \ + _parameter_list[0] == u"COMERCIAL": #print "__COMERCIAL__" + str(_parameter_list[1:]) pass else: #print "==PARAMETRO==" + str(_parameter_list[:]) pass - _final_description = _final_description + _line + "\r\n" + _final_description = _final_description + _line + u"\r\n" #print _line # Delete last empty line _description = _final_description[:-2] - _lines = _description.split("\r\n") + _lines = _description.split(u"\r\n") for _line in _lines: pass #print _line @@ -1995,31 +2035,30 @@ # remove leading spaces if _version in self.__character_sets_dict: self.__character_set = self.__character_sets_dict[_version] + print utils.mapping(_("FIEBDC character encoding: $1"),(self.__character_set,)) else: - print utils.mapping(_("This codepage do not exist in "\ - "FIEBDC3! Default codepage: $1"), + print utils.mapping(_("This Character encoding do not exist in "\ + "FIEBDC3! Default Character encoding: $1"), (self.__character_set,)) else: - print utils.mapping(_("This V record dot have a codepage! "\ - "Default codepage: $1"), + print utils.mapping(_("This V record dot have a character encoding! "\ + "Default character encoding: $1"), (self.__character_set,)) else: - print utils.mapping(_("Not 'V' record in File! Default codepage: "\ + print utils.mapping(_("Not 'V' record in File! Default character encoding: "\ "$1"), (self.__character_set,)) - if self.__character_set != "utf8": - _buffer = unicode(_buffer, self.__character_set) - _buffer = _buffer.encode("utf8") + _buffer = unicode(_buffer, self.__character_set) # Any INFORMATION between the beginning of the file and the # beginning of the first registry “~” is ignored #"after_first_tilde" : "^[^~]*~" _buffer = self.__pattern["after_first_tilde"].sub("",_buffer) - while _buffer != "" and not self.__cancel: + while _buffer != u"" and not self.__cancel: #-# the blank characters (32), tabs (9) and end of line (13 and 10) # before the separators '~', '|' are erased. # Before separator \ not deleted because it affects the reading of # the record ~P _buffer = self.eraseControlCharacters(_buffer) - _record_list = _buffer.split("~") + _record_list = _buffer.split(u"~") # The last record can be incomplete unless it is the last one of # the file if len(_record_list) > 1: @@ -2030,18 +2069,16 @@ # The blank characters (32), tabs (9) and end of line # (13 and 10) at the end of the file are ignored. #"end_control" : "((\r\n)| |\t)+$" - _record_list[-1] = self.__pattern["end_control"].sub("", + _record_list[-1] = self.__pattern["end_control"].sub(u"", _record_list[-1]) - _last_record = "" + _last_record = u"" for record in _record_list: if self.__cancel: break self.parseRecord(record) interface.progress(_file.tell() / _filesize) _buffer2 = _file.read(100000) - if self.__character_set != "utf8": - _buffer2 = unicode(_buffer2, self.__character_set) - _buffer2 = _buffer2.encode("utf8") + _buffer2 = unicode(_buffer2, self.__character_set) _buffer = _last_record + _buffer2 _file.close() if self.__cancel: @@ -2115,11 +2152,11 @@ def delete_control_space(self, text): text = self.delete_control(text) - text = text.replace(" ", "") + text = text.replace(u" ", u"") return text def delete_control(self, text): - text = text.replace("\t", "") - text = text.replace("\r", "") - text = text.replace("\n", "") + text = text.replace(u"\t", u"") + text = text.replace(u"\r", u"") + text = text.replace(u"\n", u"") return text