VerityPy 1.1
Python library for Verity data profiling, quality control, remediation
numfuncs.py
Go to the documentation of this file.
1#!/usr/bin/env python
2"""
3Number Functions
4
5various worker functions to manipulate numbers
6"""
7
8__all__ = ['convert_mainframe',
9 'get_value_from_suspect_exp',
10 'is_int',
11 'is_real',
12 'is_int_get',
13 'is_real_get',
14 'clean_number',
15 ]
16
17__version__ = '1.0'
18__author__ = 'Geoffrey Malafsky'
19__email__ = 'gmalafsky@technikinterlytics.com'
20__date__ = '20240614'
21
22
23def is_int(valnum:str) -> bool:
24 """
25 Checks if string is integer number. Returns bool
26 """
27 result:bool=False
28 try :
29 result= is_int_get(valnum,"bool")
30 except (RuntimeError, ValueError):
31 result=False
32 return result
33
34def is_real(valnum:str) -> bool:
35 """
36 Checks if string is real number. Returns bool
37 """
38 result:bool=False
39 try :
40 result= is_real_get(valnum,"bool")
41 except (RuntimeError, ValueError):
42 result=False
43 return result
44
45def is_int_get(valnum:str, typ:str="", remove_front_chars:bool=False):
46 """
47 Checks if string is integer number.
48
49 valnum: input string to check
50 typ: optional return type (string, number, bool)
51 remove_front_chars: optional. bool whether non-numeric chars as prefix will be removed
52 and therefore not declare non-numeric
53 Return depends on typ. Default is bool.
54 number: converted integer or -999999 if error
55 string: string of integer or false:reason if not. Reason will
56 be detected issue such as decimal (has decimal point),
57 empty, non-numeric
58 """
59 isneg:bool=False
60 resultstr:str=""
61 resultbool:bool=False
62 resultnum:int=-999999
63 numstr:str=""
64 charstr:str=""
65 nstart:int=-1
66 numes:int=0
67 reason:str=""
68 try :
69 typ=typ.lower().strip()
70 if typ not in ["bool","number","string"]:
71 typ="bool"
72 valnum=valnum.lower().strip()
73 if valnum.startswith("-"):
74 valnum= valnum[1:]
75 isneg=True
76 elif valnum.startswith("(") and valnum.endswith(")"):
77 valnum= valnum[1:-1]
78 isneg=True
79 elif valnum.startswith("+"):
80 valnum= valnum[1:]
81 if len(valnum)==0:
82 resultstr= "false"
83 reason="empty"
84 elif any(x in valnum for x in ["e+","e-"]):
85 numstr= get_value_from_suspect_exp(valnum)
86 if len(numstr)==0 or numstr.startswith("notok:") or numstr==valnum:
87 resultstr= "false"
88 reason="non-numeric"
89 elif "." in numstr:
90 resultstr= "false"
91 reason="decimal"
92 else:
93 try:
94 resultnum=int(numstr)
95 except ValueError:
96 resultstr= "false"
97 reason="non-numeric"
98 elif "." in valnum:
99 resultstr= "false"
100 reason="decimal"
101 elif not remove_front_chars:
102 try:
103 resultnum=int(valnum)
104 except ValueError:
105 resultstr= "false"
106 reason="non-numeric"
107 else:
108 for i in range(len(valnum)):
109 charstr= valnum[i:i+1]
110 if charstr.isdigit():
111 numstr += charstr
112 if nstart<0:
113 nstart=i
114 elif charstr==".":
115 resultstr= "false"
116 reason="decimal"
117 break
118 elif charstr=="e" and numes==0:
119 numes=1
120 elif nstart> -1:
121 resultstr= "false"
122 reason="non-numeric"
123 break
124 if nstart<0:
125 resultstr= "false"
126 reason="non-numeric"
127
128 if len(reason)==0 and nstart>-1 and (numes==1 or len(numstr)>0):
129 try:
130 if numes==1:
131 resultnum=int(valnum)
132 elif len(numstr)>0:
133 resultnum= int(numstr)
134 except ValueError:
135 resultstr= "false"
136 reason="non-numeric"
137 if len(reason)>0:
138 resultstr = "false:" + reason
139 resultbool=False
140 elif not remove_front_chars and nstart>0:
141 if typ=="bool":
142 resultbool=False
143 elif typ=="number":
144 resultnum= -999999
145 else:
146 resultbool=True
147 if isneg:
148 resultnum *= -1
149 resultstr= str(resultnum)
150 if typ=="bool":
151 return resultbool
152 if typ=="number":
153 return resultnum
154 except (RuntimeError, ValueError):
155 resultstr= "false:error"
156 return resultstr
157
158def is_real_get(valnum:str, typ:str="", remove_front_chars:bool=False):
159 """
160 Checks if string is real number.
161
162 valnum: input string to check
163 typ: optional return type (string, number, bool)
164 remove_front_chars: optional. bool whether non-numeric chars as prefix will be removed
165 and therefore not declare non-numeric
166 Return depends on typ. Default is bool.
167 number: converted real or -999999 if error
168 string: string of real or false:reason if not. Reason will
169 be detected issue such as empty, non-numeric
170 """
171 isneg:bool=False
172 resultstr:str=""
173 resultbool:bool=False
174 resultnum:float=-999999
175 numstr:str=""
176 charstr:str=""
177 nstart:int=-1
178 nperiod:int=-1
179 numes:int=0
180 reason:str=""
181 try :
182 typ=typ.lower().strip()
183 if typ not in ["bool","number","string"]:
184 typ="bool"
185 valnum=valnum.lower().strip()
186 if valnum.startswith("-"):
187 valnum= valnum[1:]
188 isneg=True
189 elif valnum.startswith("(") and valnum.endswith(")"):
190 valnum= valnum[1:-1]
191 isneg=True
192 elif valnum.startswith("+"):
193 valnum= valnum[1:]
194 if len(valnum)==0:
195 resultstr= "false"
196 reason="empty"
197 elif any(x in valnum for x in ["e+","e-"]):
198 numstr= get_value_from_suspect_exp(valnum)
199 if len(numstr)==0 or numstr.startswith("notok:") or numstr==valnum:
200 resultstr= "false"
201 reason="non-numeric"
202 else:
203 try:
204 resultnum=float(numstr)
205 except ValueError:
206 resultstr= "false"
207 reason="non-numeric"
208 elif not remove_front_chars:
209 try:
210 resultnum=float(valnum)
211 except ValueError:
212 resultstr= "false"
213 reason="non-numeric"
214 else:
215 for i in range(len(valnum)):
216 charstr= valnum[i:i+1]
217 if charstr.isdigit():
218 numstr += charstr
219 if nstart<0:
220 nstart=i
221 elif charstr==".":
222 if nperiod<0:
223 numstr += charstr
224 nperiod=i
225 else:
226 resultstr= "false"
227 reason="duplicate decimal"
228 break
229 elif charstr=="e" and numes==0:
230 numes=1
231 elif nstart> -1:
232 resultstr= "false"
233 reason="non-numeric"
234 break
235 if nstart<0:
236 resultstr= "false"
237 reason="non-numeric"
238
239 if len(reason)==0 and nstart>-1 and (numes==1 or len(numstr)>0):
240 try:
241 if numes==1:
242 resultnum=float(valnum)
243 elif len(numstr)>0:
244 resultnum= float(numstr)
245 except ValueError:
246 resultstr= "false"
247 reason="non-numeric"
248 if len(reason)>0:
249 resultstr = "false:" + reason
250 resultbool=False
251 elif not remove_front_chars and nstart>0:
252 if typ=="bool":
253 resultbool=False
254 elif typ=="number":
255 resultnum= -999999
256 else:
257 resultbool=True
258 if isneg:
259 resultnum *= -1
260 resultstr= str(resultnum)
261 if "." not in resultstr:
262 resultstr += ".0"
263 if typ=="bool":
264 return resultbool
265 if typ=="number":
266 return resultnum
267 except (RuntimeError, ValueError):
268 resultstr= "false:error"
269 return resultstr
270
271def convert_mainframe(valnum:str) -> str:
272 """
273 Convert MainFrame formatted number string
274
275 Converts a string representing a main frame formatted number with an encoded last character into a string of a
276 real along with sign reversal if necessary
277 Always makes last 2 digits into decimal portion so no further divide by 100 is necessary. If special char is
278 within input string it becomes the end char and the
279 remaining suffix is discarded. Leading zeros are truncated so 000.12 becomes 0.12 . Codes are:
280 {= 0
281 }= 0 and negate
282 a= 1
283 j= 1 and negate
284 b= 2
285 k= 2 and negate
286 c= 3
287 l= 3 and negate
288 d= 4
289 m= 4 and negate
290 e= 5
291 n= 5 and negate
292 f= 6
293 o= 6 and negate
294 g= 7
295 p= 7 and negate
296 h= 8
297 q= 8 and negate
298 i= 9
299 r= 9 and negate
300 Return result or starts with 'notok:' if error. If no special char found then original string returned
301 """
302
303 str_out:str=""
304 str1:str=""
305 str_in:str=""
306 codechar:str=""
307 chg_sign:bool=False
308 signtyp:str=""
309 n1:int=-1
310 dval:float=-1
311 try:
312 str_in= valnum.strip().lower()
313 if len(str_in)==0:
314 raise ValueError("str_in is empty")
315 if str_in.startswith("-"):
316 signtyp="-"
317 str_in=str_in[1:]
318 if len(str_in)>=2:
319 for i in range(20):
320 if i==0:
321 str1="{"
322 elif i==1:
323 str1="}"
324 elif i==2:
325 str1="a"
326 elif i==3:
327 str1="j"
328 elif i==4:
329 str1="b"
330 elif i==5:
331 str1="k"
332 elif i==6:
333 str1="c"
334 elif i==7:
335 str1="l"
336 elif i==8:
337 str1="d"
338 elif i==9:
339 str1="m"
340 elif i==10:
341 str1="n"
342 elif i==11:
343 str1="f"
344 elif i==12:
345 str1="o"
346 elif i==13:
347 str1="g"
348 elif i==14:
349 str1="p"
350 elif i==15:
351 str1="h"
352 elif i==16:
353 str1="q"
354 elif i==17:
355 str1="i"
356 elif i==18:
357 str1="r"
358 else:
359 break
360 if str1 in str_in:
361 n1= str_in.find(str1)
362 break
363
364 if n1>=0:
365 codechar= str_in[n1:(n1+1)]
366 str_out=str_in[:n1]
367 else:
368 return str_in
369 str1=""
370 if codechar=="{":
371 str1="0"
372 elif codechar=="}":
373 str1="0"
374 chg_sign=True
375 elif codechar=="a":
376 str1="1"
377 elif codechar=="j":
378 str1="1"
379 chg_sign=True
380 elif codechar=="b":
381 str1="2"
382 elif codechar=="k":
383 str1="2"
384 chg_sign=True
385 elif codechar=="c":
386 str1="3"
387 elif codechar=="l":
388 str1="3"
389 chg_sign=True
390 elif codechar=="d":
391 str1="4"
392 elif codechar=="m":
393 str1="4"
394 chg_sign=True
395 elif codechar=="e":
396 str1="5"
397 elif codechar=="n":
398 str1="5"
399 chg_sign=True
400 elif codechar=="f":
401 str1="6"
402 elif codechar=="o":
403 str1="6"
404 chg_sign=True
405 elif codechar=="g":
406 str1="7"
407 elif codechar=="p":
408 str1="7"
409 chg_sign=True
410 elif codechar=="h":
411 str1="8"
412 elif codechar=="q":
413 str1="8"
414 chg_sign=True
415 elif codechar=="i":
416 str1="9"
417 elif codechar=="r":
418 str1="9"
419 chg_sign=True
420 elif codechar.isdigit():
421 str1=codechar
422 str_out += str1
423 if len(str_out)>=2 and "." not in str_out:
424 str1=str_out[-2:]
425 str_out=str_out[:-2]
426 str_out += "." + str1
427 if chg_sign:
428 if signtyp=="-":
429 signtyp=""
430 else:
431 signtyp="-"
432
433 if (dval := is_real_get(str_out,"number",True))!= -999999:
434 str_out=str(dval)
435 if str_out.startswith("."):
436 str_out= "0" + str_out
437 elif "." in str_out:
438 n1=str_out.find(".")
439 str1= str_out[(n1+1):]
440 str_out=str_out[:(n1+1)]
441 if len(str1)==0:
442 str1="00"
443 elif len(str1)==1:
444 str1+= "0"
445 str_out += str1
446 else:
447 str_out="0.00"
448 else:
449 str_out="0.00"
450 if signtyp=="-":
451 str_out= signtyp + str_out
452 except (RuntimeError,ValueError) as err:
453 str_out="notok:" + str(err)
454 return str_out
455
456def get_value_from_suspect_exp(valnum:str) -> str:
457 """
458 Get Value From Suspected Exponential
459
460 Check string to see if it is an exponential number
461 which is extracted into real number if so
462 Returns string of number if converted or original string. Starts with notok: if error
463 """
464
465 result:str=""
466 orignum:str=""
467 numstr:str=""
468 intpart:str=""
469 decpart:str=""
470 exppart:str=""
471 isexpneg:bool=False
472 isnegval:bool=False
473 flag:bool=False
474 n1:int=-1
475 dexp:float=-1
476 numexp:float=-1
477 dval:float=-1
478 nper:int=-1
479 try:
480 result=valnum
481 orignum=valnum.lower()
482 if orignum.startswith("-"):
483 isnegval=True
484 orignum=orignum[1:]
485 elif orignum.startswith("(") and orignum.endswith(")"):
486 isnegval=True
487 orignum=orignum[1:-1]
488 if "e-" in orignum:
489 isexpneg=True
490 if "e" in orignum:
491 numstr= orignum[:orignum.find("e")]
492 exppart=orignum[orignum.find("e")+1:]
493 if len(exppart)>0 and is_real(exppart):
494 flag=True
495 if flag:
496 if not is_real(numstr):
497 flag=False
498
499 if flag:
500 dval= is_real_get(numstr, "number", True)
501 if exppart.startswith("+") or exppart.startswith("-"):
502 exppart=exppart[1:]
503 if len(exppart)>0 and (dexp := is_real_get(exppart, "number", True))!= -999999:
504 if isexpneg:
505 dexp *= -1
506 numexp= 10**dexp
507 else:
508 numexp=1
509 dval *= numexp
510 result= str(dval)
511 if "." in result:
512 nper=result.find(".")
513 decpart=result[nper+1:]
514 intpart=result[:nper]
515 if is_int(decpart):
516 n1=int(decpart)
517 if n1==0:
518 result=intpart
519 elif len(decpart)==0:
520 result = intpart + ".00"
521 elif len(decpart)==1:
522 result = intpart + "." + decpart + "0"
523 else:
524 result=intpart
525 if result.startswith("."):
526 result = "0" + result
527
528 if flag and isnegval:
529 result= "-" + result
530 except (RuntimeError, ValueError, OSError) as err:
531 result= "notok:" + str(err)
532 return result
533
534def clean_number(valnum:str) -> str:
535 """
536 Cleans non-numeric prefix and suffix characters from number.
537 Enclosing parens which is interpreted as negative indicator and replaced with -,
538 while leading + is removed.
539 returns string starting with notok: if error
540 """
541
542 resultstr:str=""
543 txt:str=""
544 charstr:str=""
545 isneg:bool=False
546 isok:bool=False
547 nstart:int=-1
548 ndec:int=-1
549 numes:int=0
550 dval:float=0
551 try:
552 valnum=valnum.lower().strip()
553 if valnum.startswith("-"):
554 valnum= valnum[1:]
555 isneg=True
556 elif valnum.startswith("(") and valnum.endswith(")"):
557 valnum= valnum[1:-1]
558 isneg=True
559 elif valnum.startswith("+"):
560 valnum= valnum[1:]
561 if len(valnum)==0:
562 resultstr= "-false-"
563 else:
564 try:
565 dval=float(valnum)
566 resultstr=valnum
567 isok=True
568 except ValueError:
569 resultstr= ""
570
571 if not isok and not resultstr=="-false-":
572 if any(x in valnum for x in ["e+","e-"]):
573 txt= get_value_from_suspect_exp(valnum)
574 if len(txt)==0 or txt.startswith("notok:") or txt==valnum:
575 resultstr= "-false-"
576 else:
577 try:
578 dval=float(txt)
579 resultstr=txt
580 isok=True
581 except ValueError:
582 resultstr= "-false-"
583 else:
584 for i in range(len(valnum)):
585 if i>=50:
586 resultstr= "-false-"
587 break
588
589 charstr= valnum[i:i+1]
590 if charstr.isdigit():
591 resultstr += charstr
592 if nstart<0:
593 nstart=i
594 elif charstr=="-":
595 if not isneg:
596 isneg=True
597 if nstart<0:
598 nstart=i
599 elif nstart>-1:
600 break
601 elif charstr==".":
602 if ndec<0:
603 resultstr += charstr
604 ndec=i
605 else:
606 resultstr= "-false-"
607 break
608 elif charstr=="e" and numes==0:
609 numes=1
610 elif nstart> -1:
611 break
612 if nstart<0:
613 resultstr= "-false-"
614
615 if not resultstr=="-false-" and nstart>-1:
616 if numes==1:
617 try:
618 dval=float(valnum)
619 resultstr= str(dval)
620 except ValueError:
621 resultstr="-false-"
622 elif len(resultstr)>0:
623 try:
624 dval=float(resultstr)
625 except ValueError:
626 resultstr="-false-"
627 if resultstr=="-false-":
628 resultstr=""
629 elif isneg and len(resultstr)>0 and not resultstr=="0" and not resultstr.startswith("-"):
630 resultstr= "-" + resultstr
631 except (RuntimeError,ValueError) as err:
632 resultstr="notok:" + str(err)
633 return resultstr
bool is_int(str valnum)
Definition numfuncs.py:23
str get_value_from_suspect_exp(str valnum)
Definition numfuncs.py:456
str convert_mainframe(str valnum)
Definition numfuncs.py:271
bool is_real(str valnum)
Definition numfuncs.py:34
is_real_get(str valnum, str typ="", bool remove_front_chars=False)
Definition numfuncs.py:158
is_int_get(str valnum, str typ="", bool remove_front_chars=False)
Definition numfuncs.py:45
str clean_number(str valnum)
Definition numfuncs.py:534