64 """Reads report file to parse into
65 QualityAnalysis object report. This should be a report written by VerityPy.
67 file_uri: string URI to local or Cloud accessible file
69 returns QualityAnalysis object. Its status property will start with notok: if there is an error
73 report: qualityanalysis.QualityAnalysis= qualityanalysis.QualityAnalysis()
92 raise ValueError(
"no fileURI")
93 elif len(file_uri.strip())==0:
94 raise ValueError(
"fileURI is empty")
95 with open(file_uri,
"r", encoding=
"utf-8")
as f:
99 if linein.endswith(
"\r\n"):
101 if linein.endswith(
"\n")
or linein.endswith(
"\r"):
106 elif linein.startswith(
"#")
or linein.startswith(
"//"):
108 elif linein
in (LSQUARE,RSQUARE,LCURLY,RCURLY):
112 linein=linein.replace(DQ,
"")
113 if linein.startswith(COMMA):
115 if linein.endswith(COMMA):
117 if linein.startswith(LCURLY):
119 if linein.endswith(RCURLY):
126 lineinlc= linein.lower()
128 if not "report:[" in linein:
129 raise ValueError(
"first line does not have required report:[ object name")
132 elif "err_stats:[" in lineinlc
or "errstats:[" in lineinlc:
137 elif "fields_err_datatype:[" in lineinlc
or "fieldserrdatatype:[" in lineinlc:
141 elif "fields_err_fmt:[" in lineinlc
or "fieldserrfmt:[" in lineinlc
or "fieldserrformat:[" in lineinlc:
145 elif "fields:[" in lineinlc:
147 elif "field_datatypes:[" in lineinlc
or "fielddatatypes:[" in lineinlc:
148 cursect=
"fielddatatypes"
150 elif "field_formats:[" in lineinlc
or "fieldformats:[" in lineinlc:
151 cursect=
"fieldformats"
153 elif "field_quality:[" in lineinlc
or "fieldquality:[" in lineinlc:
154 cursect=
"fieldquality"
156 elif "field_datatype_dists:[" in lineinlc
or "fielddatatypedists:[" in lineinlc:
157 cursect=
"fielddatatypedists"
159 elif "field_uniqvalues:[" in lineinlc
or "fielduniqvalues:[" in lineinlc:
160 cursect=
"fielduniqvalues"
162 elif "field_spchar_dists:[" in lineinlc
or "fieldspchardists:[" in lineinlc
or "fieldspecchardists:[" in lineinlc:
163 cursect=
"fieldspchardists"
165 elif "spec_char_dists:[" in lineinlc
or "specchardists:[" in lineinlc:
166 cursect=
"spchardists"
167 elif "spec_char_examples:[" in lineinlc
or "speccharexamples:[" in lineinlc:
168 cursect=
"spcharexamples"
169 elif "err_datatype_examples:[" in lineinlc
or "errdatatypeexamples:[" in lineinlc:
170 cursect=
"errdatatypeexamples"
171 elif "err_fmt_examples:[" in lineinlc
or "errfmtexamples:[" in lineinlc:
172 cursect=
"errfmtexamples"
173 elif "rec_size_dist:[" in lineinlc
or "recsizedist:[" in lineinlc:
174 cursect=
"recsizedist"
175 elif "rec_parse_dist:[" in lineinlc
or "recparsedist:[" in lineinlc:
176 cursect=
"recparsedist"
177 elif "rec_parse_errs:[" in lineinlc
or "recparseerrs:[" in lineinlc
or "recparseerrors:[" in lineinlc:
178 cursect=
"recparseerrs"
179 elif "rec_parse_errs_examples:[" in lineinlc
or "recparseerrsexamples:[" in lineinlc:
180 cursect=
"recparseerrsexamples"
181 elif "covalues:[" in lineinlc:
183 elif "covalue_uniqvalues:[" in lineinlc
or "covalueuniqvalues:[" in lineinlc:
184 cursect=
"covalueuniqvalues"
186 elif "reasons:[" in lineinlc:
187 if cursect==
"errstat":
189 raise ValueError(
"found errstat reasons:[ but for unknown child section at nline=" + str(nline))
191 raise ValueError(
"found errstat." + cursect2 +
" reasons:[ but for unknown field at nline=" + str(nline))
194 raise ValueError(
"found reasons:[ but for unexpected location at nline=" + str(nline))
195 elif cursect==
"report":
197 elem= linein[:linein.find(
":")].lower()
198 elemval= linein[(linein.find(
":")+1):]
200 report.title= elemval
202 report.status= elemval
203 elif elem==
"numrecs":
204 report.numrecs = numfuncs.is_int_get(elemval,
"number",
False)
206 report.maxuv = numfuncs.is_int_get(elemval,
"number",
False)
208 report.delim= elemval
209 elif elem
in (
"delim_char",
"delimchar"):
210 report.delim_char= elemval
211 elif elem
in (
"is_case_sens",
"iscasesens"):
212 report.is_case_sens =
True if elemval==
"true" else False
213 elif elem
in (
"is_quoted",
"isquoted"):
214 report.is_quoted =
True if elemval==
"true" else False
215 elif elem
in (
"has_header",
"hasheader"):
216 report.has_header =
True if elemval==
"true" else False
217 elif elem
in (
"extract_fields",
"extractfields"):
218 report.extract_fields =
True if elemval==
"true" else False
219 elif cursect==
"fields":
221 if elemval.lower()
in report.hash_fields:
222 raise ValueError(
"duplicate field in fields section: " + elemval)
223 report.fields.append(field.Field(elemval))
224 report.hash_fields[elemval.lower()]= len(report.fields)-1
225 report.field_names_lower.append(elemval.lower())
226 report.field_datatype_dist.append({
"int":0,
"real":0,
"bool":0,
"date":0,
"string":0,
"empty":0})
227 report.field_uniqvals.append([])
228 report.field_quality.append(
"")
229 report.spec_char_dist_field.append({})
230 elif cursect==
"fielddatatypes":
233 if nitem< len(report.fields):
234 report.fields[nitem].datatype= elemval
235 elif cursect==
"fieldformats":
238 if nitem< len(report.fields):
240 if "strcase:" in txt:
241 txt1= txt[(txt.find(
"strcase:")+8):]
243 txt1=txt1[:txt1.find(COMMA)]
244 report.fields[nitem].fmt_strcase=txt1
246 txt1= txt[(txt.find(
"strlen:")+7):]
248 txt1=txt1[:txt1.find(COMMA)]
249 if numfuncs.is_int(txt1):
250 report.fields[nitem].fmt_strlen= int(txt1)
251 if "decimal:" in txt:
252 txt1= txt[(txt.find(
"decimal:")+8):]
254 txt1=txt1[:txt1.find(COMMA)]
255 if numfuncs.is_int(txt1):
256 report.fields[nitem].fmt_decimal= int(txt1)
258 txt1= txt[(txt.find(
"date:")+5):]
260 txt1=txt1[:txt1.find(COMMA)]
261 report.fields[nitem].fmt_date=txt1
263 txt1= txt[(txt.find(
"strcut:")+7):]
265 txt1=txt1[:txt1.find(COMMA)]
266 report.fields[nitem].fmt_strcut=txt1
268 txt1= txt[(txt.find(
"strpad:")+7):]
270 txt1=txt1[:txt1.find(COMMA)]
271 report.fields[nitem].fmt_strpad=txt1
272 if "strpadchar:" in txt:
273 txt1= txt[(txt.find(
"strpadchar:")+11):]
275 txt1=txt1[:txt1.find(COMMA)]
276 report.fields[nitem].fmt_strpadchar=txt1
277 elif cursect==
"fieldquality":
280 if nitem< len(report.fields):
281 report.field_quality[nitem]= elemval
282 elif cursect==
"fielddatatypedists":
285 if nitem< len(report.fields):
288 txt1= txt[(txt.find(
"int:")+4):]
290 txt1=txt1[:txt1.find(COMMA)]
291 if numfuncs.is_int(txt1):
292 report.field_datatype_dist[nitem][
"int"]= int(txt1)
294 txt1= txt[(txt.find(
"real:")+5):]
296 txt1=txt1[:txt1.find(COMMA)]
297 if numfuncs.is_int(txt1):
298 report.field_datatype_dist[nitem][
"real"]= int(txt1)
300 txt1= txt[(txt.find(
"bool:")+5):]
302 txt1=txt1[:txt1.find(COMMA)]
303 if numfuncs.is_int(txt1):
304 report.field_datatype_dist[nitem][
"bool"]= int(txt1)
306 txt1= txt[(txt.find(
"date:")+5):]
308 txt1=txt1[:txt1.find(COMMA)]
309 if numfuncs.is_int(txt1):
310 report.field_datatype_dist[nitem][
"date"]= int(txt1)
312 txt1= txt[(txt.find(
"string:")+7):]
314 txt1=txt1[:txt1.find(COMMA)]
315 if numfuncs.is_int(txt1):
316 report.field_datatype_dist[nitem][
"string"]= int(txt1)
318 txt1= txt[(txt.find(
"empty:")+6):]
320 txt1=txt1[:txt1.find(COMMA)]
321 if numfuncs.is_int(txt1):
322 report.field_datatype_dist[nitem][
"empty"]= int(txt1)
323 elif cursect==
"fielduniqvalues":
325 if "field:" in elemval:
326 curfld= elemval[(elemval.find(
":")+1):].lower()
327 if not curfld
in report.hash_fields:
328 raise ValueError(
"fielduniqvalues has unknown field: " + curfld +
" at nline=" + str(nline))
329 elif "uniqvalues:[" in elemval.lower():
332 elif len(curfld)>0
and "uniqvalue:" in elemval
and ",count:" in elemval:
333 elemval= elemval[(elemval.find(
"uniqvalue:") + 10):]
334 elem= elemval[:elemval.find(
",count:")]
335 elemval= elemval[(elemval.find(
",count:")+7):]
336 nitem= report.hash_fields[curfld]
337 if (nval := numfuncs.is_int_get(elemval,
"number",
False)) <0:
339 report.field_uniqvals[nitem].append((elem,nval))
340 elif cursect==
"covalues":
341 if linein.startswith(LCURLY):
343 if linein.endswith(RCURLY):
345 if "title:" in linein:
346 elemval= linein[(linein.find(
"title:") + 6):]
347 if ",field1:" in elemval:
348 elemval= elemval[:elemval.find(
",field1:")]
349 report.covalues.append(field.CoValue(elemval))
350 nitem= len(report.covalues)-1
351 hash_covalues[elemval.lower()]= nitem
352 report.covalue_uniqvals.append([])
353 if ",field1:" in linein:
354 elemval= linein[(linein.find(
",field1:") + 8):]
355 if ",field2:" in elemval:
356 elemval= elemval[:elemval.find(
",field2:")]
357 report.covalues[nitem].field1=elemval
358 if ",field2:" in linein:
359 elemval= linein[(linein.find(
",field2:") + 8):]
360 if ",field3:" in elemval:
361 elemval= elemval[:elemval.find(
",field3:")]
362 report.covalues[nitem].field2=elemval
363 if ",field3:" in linein:
364 elemval= linein[(linein.find(
",field3:") + 8):]
365 if ",field1_index:" in elemval:
366 elemval= elemval[:elemval.find(
",field1_index:")]
367 report.covalues[nitem].field3=elemval
368 if ",field1_index:" in linein:
369 elemval= linein[(linein.find(
",field1_index:") + 14):]
370 if ",field2_index:" in elemval:
371 elemval= elemval[:elemval.find(
",field2_index:")]
372 report.covalues[nitem].field1_index= n1
if (n1 := numfuncs.is_int_get(elemval,
"number",
False)) >=0
else -1
373 if ",field2_index:" in linein:
374 elemval= linein[(linein.find(
",field2_index:") + 14):]
375 if ",field3_index:" in elemval:
376 elemval= elemval[:elemval.find(
",field3_index:")]
377 report.covalues[nitem].field2_index= n1
if (n1 := numfuncs.is_int_get(elemval,
"number",
False)) >=0
else -1
378 if ",field3_index:" in linein:
379 elemval= linein[(linein.find(
",field3_index:") + 14):]
380 if ",numfields:" in elemval:
381 elemval= elemval[:elemval.find(
",numfields:")]
382 report.covalues[nitem].field3_index= n1
if (n1 := numfuncs.is_int_get(elemval,
"number",
False)) >=0
else -1
383 if ",numfields:" in linein:
384 elemval= linein[(linein.find(
",numfields:") + 11):]
385 report.covalues[nitem].numfields= n1
if (n1 := numfuncs.is_int_get(elemval,
"number",
False)) >0
else 0
386 elif cursect==
"covalueuniqvalues":
388 if "covalue:" in lineinlc:
389 curfld= elemval[(elemval.find(
":")+1):].lower()
390 if curfld
not in hash_covalues:
391 raise ValueError(
"coValue from its uniqueValues is not known: " + curfld +
" at nline=" + str(nline))
392 elif "uniqvalues:[" in lineinlc:
395 elif len(curfld)>0
and "uniqvalue:" in lineinlc
and ",count:" in elemval:
396 elemval= elemval[(lineinlc.find(
"uniqvalue:") + 10):]
397 elem= elemval[:elemval.find(
",count:")]
398 elemval= elemval[(elemval.find(
",count:")+7):]
399 nitem= hash_covalues[curfld]
400 if (nval := numfuncs.is_int_get(elemval,
"number",
False)) <0:
402 report.covalue_uniqvals[nitem].append((elem,nval))
403 elif cursect==
"fieldspchardists":
405 if "field:" in elemval:
406 curfld= elemval[elemval.find(
"field:")+6:].lower()
407 if curfld.endswith(
":["):
409 if not curfld
in report.hash_fields:
410 raise ValueError(
"fieldspchardists has unknown field: " + curfld +
" at nline=" + str(nline))
411 elif ":[" in elemval:
412 curfld= elemval[:elemval.find(
":[")].lower()
413 if not curfld
in report.hash_fields:
414 raise ValueError(
"fieldspchardists has unknown field: " + curfld +
" at nline=" + str(nline))
417 elem= elemval[:elemval.find(
":")]
418 elemval= elemval[(elemval.find(
":")+1):]
419 nitem= report.hash_fields[curfld]
420 if (nval := numfuncs.is_int_get(elemval,
"number",
False)) <0:
422 report.spec_char_dist_field[nitem][elem]=nval
423 elif cursect==
"spchardists":
426 elem= elemval[:elemval.find(
":")]
427 elemval= elemval[(elemval.find(
":")+1):]
428 if (nval := numfuncs.is_int_get(elemval,
"number",
False)) <0:
430 report.spec_char_dist[elem]=nval
431 elif cursect==
"spcharexamples":
432 if "example:" in linein:
433 elemval= linein[(linein.find(
"example:")+8):]
435 if ",rec:" in elemval:
436 txt= elemval[(elemval.find(
",rec:")+5):]
437 elemval= elemval[:elemval.find(
",rec:")]
438 elemval=elemval.replace(
"{",
"").replace(
"}",
"")
439 report.spec_char_examples.append(elemval + txt)
440 elif cursect==
"errdatatypeexamples":
441 if "nline:" in linein:
442 elemval= linein[(linein.find(
"nline:")+6):]
444 if ",rec:" in elemval:
445 txt= elemval[(elemval.find(
",rec:")+5):]
446 elemval = elemval[:elemval.find(
",rec:")]
447 elemval =
"(" + elemval +
")" + txt
448 report.err_datatype_examples.append(elemval)
449 elif cursect==
"errfmtexamples":
450 elemval= linein[(linein.find(
"nline:")+6):]
452 if ",rec:" in elemval:
453 txt= elemval[(elemval.find(
",rec:")+5):]
454 elemval = elemval[:elemval.find(
",rec:")]
455 elemval =
"(" + elemval +
")" + txt
456 report.err_fmt_examples.append(elemval)
457 elif cursect==
"errstat":
459 elem= linein[:linein.find(
":")].lower()
460 elemval= linein[(linein.find(
":")+1):]
461 if elem
in [
"numrecs_err",
"numrecserr"]:
462 report.err_stats[
"numrecs_err"]= numfuncs.is_int_get(elemval,
"number",
False)
463 elif elem
in [
"numrecs_err_datatype",
"numrecserrdatatype"]:
464 report.err_stats[
"numrecs_err_datatype"]= numfuncs.is_int_get(elemval,
"number",
False)
465 elif elem
in [
"numrecs_err_fmt",
"numrecserrfmt"]:
466 report.err_stats[
"numrecs_err_fmt"]= numfuncs.is_int_get(elemval,
"number",
False)
469 raise ValueError(
"found field in errstat for unknown type at nline=" + str(nline))
470 if ",count:" in elemval:
471 txt= elemval[(elemval.find(
":")+1):]
472 if numfuncs.is_int(txt):
475 raise ValueError(
"no count for field in errstat." + cursect2 +
" at nline=" + str(nline))
476 curfld= elemval[:elemval.find(
",count:")]
478 raise ValueError(
"no ,count: part for field in errstat." + cursect2 +
" at nline=" + str(nline))
479 if cursect2==
"flderrdt":
480 report.err_stats[
"fields_err_datatype"][curfld]={}
481 report.err_stats[
"fields_err_datatype"][curfld][
"count"]=n1
482 report.err_stats[
"fields_err_datatype"][curfld][
"reasons"]={}
483 elif cursect2==
"flderrfmt":
484 report.err_stats[
"fields_err_fmt"][curfld]={}
485 report.err_stats[
"fields_err_fmt"][curfld][
"count"]=n1
486 report.err_stats[
"fields_err_fmt"][curfld][
"reasons"]={}
487 elif elem==
"reason" and cursect3==
"reason":
489 raise ValueError(
"found reason in errstat for unknown type at nline=" + str(nline))
491 raise ValueError(
"found reason in errstat for unknown field at nline=" + str(nline))
492 if ",count:" in elemval:
493 txt= elemval[(elemval.find(
":")+1):]
494 if numfuncs.is_int(txt):
497 raise ValueError(
"no count for reason in errstat." + cursect2 +
" at nline=" + str(nline))
498 reason= elemval[:elemval.find(
",count:")]
500 raise ValueError(
"no ,count: part for field in errstat." + cursect2 +
" at nline=" + str(nline))
501 if cursect2==
"flderrdt":
502 report.err_stats[
"fields_err_datatype"][curfld][
"reasons"][reason]=n1
503 elif cursect2==
"flderrfmt":
504 report.err_stats[
"fields_err_fmt"][curfld][
"reasons"][reason]=n1
505 elif cursect==
"recsizedist":
508 elem= elemval[:elemval.find(
":")]
509 elemval=elemval[(elemval.find(
":")+1):]
510 if (nval := numfuncs.is_int_get(elemval,
"number",
False)) <0:
512 report.rec_size_dist[elem]= nval
513 elif cursect==
"recparsedist":
516 elem= elemval[:elemval.find(
":")]
517 elemval=elemval[(elemval.find(
":")+1):]
518 if (nval := numfuncs.is_int_get(elemval,
"number",
False)) <0:
520 report.rec_parse_dist[elem]= nval
521 elif cursect==
"recparseerrs":
524 elem= elemval[:elemval.find(
":")].lower()
525 elemval=elemval[(elemval.find(
":")+1):]
526 if (nval := numfuncs.is_int_get(elemval,
"number",
False)) <0:
529 report.rec_parse_errs[
"small1"]= nval
530 elif "small2" in elem:
531 report.rec_parse_errs[
"small2"]= nval
533 report.rec_parse_errs[
"big"]= nval
534 elif cursect==
"recparseerrsexamples":
535 if "small1:[" in lineinlc:
537 elif "small2:[" in lineinlc:
539 elif "big:[" in lineinlc:
541 elif "record:" in lineinlc:
542 elemval=linein[(linein.find(
":")+1):]
543 report.rec_parse_errs[cursect2 +
"_recs"].append(elemval)
544 except RuntimeError
as rte:
545 print(
"runtime error: {0}", str(rte))
546 report.status=
"notok:" + str(rte)
547 except OSError
as ose:
548 print(
"OS error: {0}", str(ose))
549 report.status=
"notok:" + str(ose)
550 except ValueError
as ve:
551 print(
"value error: {0}", str(ve))
552 report.status=
"notok:" + str(ve)