VerityPy 1.1
Python library for Verity data profiling, quality control, remediation
field.py
Go to the documentation of this file.
1"""
2Field and CoValue objects
3"""
4
5__all__ = ['Field','CoValue']
6__version__ = '1.0'
7__author__ = 'Geoffrey Malafsky'
8__email__ = 'gmalafsky@technikinterlytics.com'
9__date__ = '20240627'
10
11
12class Field:
13 """
14 Field object with attributes for title, datatype, formatting
15 title: field name. Cannot use special characters
16 datatype: field datatype (string, int, real, bool, date)
17 fmt_strcase: for datatype=string, optionally specifies a value is upper or lower case. (upper,lower,"")
18 fmt_strlen: for datatype=string, optionally specifies a required integer length (1-n). If value < 1 then this is ignored.
19 fmt_strcut: for datatype= string when strlen>0 and value length larger then chars removed from either front or back. Default is back (front,back)
20 fmt_strpad: for datatype= string when strlen>0 and value length shorter then chars added to either front or back. Default is back (front,back)
21 fmt_strpadchar: for datatype= string when padding uses this character. Must be 1 character or use one of names (space, fslash, bslash, tab). Default is _
22 fmt_decimal: for datatype=real, optionally specifies a required integer number of decimal places (0-n)
23 fmt_date: for datatype=date, optionally specifies a required date format as one of-
24 mmddyy, mmdyy, mdyy, mmddyyyy, mmdyyyy, mdyyyy,
25 ddmmyy, ddmyy, dmyy, ddmmyyyy, ddmyyyy, dmyyyy,
26 yymmdd, yymmd, yymd, yyyymmdd, yyyymmd, yyyymd,
27 yyyyddd (3 digit day number within year),
28 yyyyMMMdd, ddMMMyyyy (MMM = 3 letter month title like 'JAN'),
29 'MONTHdd,yyyy', 'ddMONTH,yyyy', yyyyMONTHdd, ddMONTHyyyy, yyMONTHdd, ddMONTHyy (MONTH full title),
30 *dmmyyyy, mm*dyyyy, *mddyyyy, dd*myyyy (*= can be 1 or 2 characters)
31 mapto: when using this Field as an output (i.e. target) field then this specifies if it is
32 mapped to a source field which enables both renaming source fields and adding enrichment fields
33 parse_error_action: Handle empty field values due to parsing errors. Used in Refining Data as:
34 a) value to assign like 'NA' to denote parse error
35 b) set to '-ignore-' which causes the field value to remain as empty since
36 no transform nor normalize will be done
37 c) set to either '-use-' or '' which causes the empty field value to continue to
38 transform and normalization routines. Note the transform function ifEmpty and ifNotEmpty can be used to set field specific values.
39 """
40
41 def __init__(self, title:str) -> None:
42 self.title= title
43 self.datatype=""
45 self.fmt_strlen=-1
46 self.fmt_strcut="back"
47 self.fmt_strpad="back"
50 self.fmt_date=""
51 self.mapto=""
53
54 def get_json(self):
55 """
56 make JSON of field object as {"field":{"title":"xxxx","datatype":"xxxxx","strcase":fmt_strcase,"strlen":fmt_strlen....}}
57 """
58
59 result:str=""
60 dblquote:str="\""
61 result="{" + dblquote + "field" + dblquote + ":{"
62 result += dblquote + "title" + dblquote + ":" + dblquote + self.title + dblquote
63 result += "," + dblquote + "datatype" + dblquote + ":" + dblquote + self.datatype + dblquote
64 result += "," + dblquote + "strcase" + dblquote + ":" + dblquote + self.fmt_strcase + dblquote
65 result += "," + dblquote + "strlen" + dblquote + ":" + dblquote + str(self.fmt_strlen) + dblquote
66 result += "," + dblquote + "strcut" + dblquote + ":" + dblquote + self.fmt_strcut + dblquote
67 result += "," + dblquote + "strpad" + dblquote + ":" + dblquote + self.fmt_strpad + dblquote
68 result += "," + dblquote + "strpadchar" + dblquote + ":" + dblquote + self.fmt_strpadchar + dblquote
69 result += "," + dblquote + "decimal" + dblquote + ":" + dblquote + str(self.fmt_decimal) + dblquote
70 result += "," + dblquote + "date" + dblquote + ":" + dblquote + self.fmt_date + dblquote
71 result += "," + dblquote + "mapto" + dblquote + ":" + dblquote + self.mapto + dblquote
72 result += "," + dblquote + "parserroraction" + dblquote + ":" + dblquote + self.parse_error_action + dblquote
73 result += "}}"
74 return result
75
76
77
78class CoValue:
79 """
80 CoValue object to define 2 or 3 fields for joint value analysis
81
82 * title: title which is concantenation of field titles using _ to join them
83 * field1: required first field title
84 * field2: required second field title
85 * field3: optional third field title
86 * field1_index: first field's array index assigned by function
87 * field2_index: second field's array index assigned by function
88 * field3_index: third field's array index assigned by function
89 * numfields: number of fields to use either 2 or 3
90 """
91
92
93 def __init__(self, title:str) -> None:
94 self.title= title
95 self.field1=""
96 self.field2=""
97 self.field3=""
101 self.numfields= 0
102
103 def get_json(self):
104 """
105 make JSON of object as {"covalue":{"title":"xxxx","field1":field1,"field2":field2,"field3":field3,"field1_index":field1_index,"field2_index":field2_index,"field3_index":field3_index, "numfields":numfields}}
106 """
107
108 result:str=""
109 dblquote:str="\""
110 result="{" + dblquote + "covalue" + dblquote + ":{"
111 result += dblquote + "title" + dblquote + ":" + dblquote + self.title + dblquote
112 result += "," + dblquote + "field1" + dblquote + ":" + dblquote + self.field1 + dblquote
113 result += "," + dblquote + "field2" + dblquote + ":" + dblquote + self.field2 + dblquote
114 result += "," + dblquote + "field3" + dblquote + ":" + dblquote + self.field3 + dblquote
115 result += "," + dblquote + "field1_index" + dblquote + ":" + dblquote + str(self.field1_index) + dblquote
116 result += "," + dblquote + "field2_index" + dblquote + ":" + dblquote + str(self.field2_index) + dblquote
117 result += "," + dblquote + "field3_index" + dblquote + ":" + dblquote + str(self.field3_index) + dblquote
118 result += "," + dblquote + "numfields" + dblquote + ":" + dblquote + str(self.numfields) + dblquote
119 result += "}}"
120 return result
121
None __init__(self, str title)
Definition field.py:93
None __init__(self, str title)
Definition field.py:41