229 Builds a LookUpDict from text file
231 File will be read and columns extracted by splitting lines with delimiter.
232 Empty and comment (begins with # or //) lines are ignored.
233 First data containing line must contain delimited field names.
234 num_keys specifies how many columns will be used as keys (1-3) and then the next column will be used as value.
235 This number of columns must be present after splitting.
236 If double quote is in line then more precise (but slower) column separation will be used.
237 LookupDict will have its fields and recs lists filled.
239 Returns LookUpDict object whose title will start with notok: if there is an error.
251 raise ValueError(
"title is empty")
252 if num_keys<=0
or num_keys>3:
253 raise ValueError(
"num_keys is not 1-3: " + str(num_keys))
255 raise ValueError(
"delim is empty")
256 delim_char= recfuncs.delim_get_char(delim)
258 lkdict.num_keys=num_keys
259 lkdict.is_case_sens=is_case_sens
260 lkdict.file_uri=file_uri
263 with open(file_uri,
"r", encoding=
"utf-8")
as f:
268 if line.endswith(
"\n"):
269 line=line[:len(line)-1]
270 if len(line)>0
and not line.startswith(
"#")
and not line.startswith(
"//"):
273 if line.find(delim_char)<1:
274 raise ValueError(
"first data line does not contain delimiter: " + delim_char)
276 line=line.replace(dq,
'')
277 lkdict.fields= line.split(delim_char)
278 if len(lkdict.fields)< (num_keys+1):
279 raise ValueError(
"too few fields found with delim=" + delim_char +
",#fields=" + len(lkdict.fields)\
280 +
"/" + str(num_keys+1))
283 cols=recfuncs.split_quoted_line(line,delim_char)
284 if len(cols)>0
and cols[0].startswith(
"notok:"):
285 raise RuntimeError(
"error splitting quoted string: " + cols[0][6:])
287 cols=line.split(delim_char)
289 if len(cols)< (num_keys+1):
290 raise ValueError(
"too few fields found at nline=" + str(nline) +
",#fields=" + len(cols)\
291 +
"/" + str(num_keys+1))
298 lkrec.result=cols[num_keys]
301 lkrec.key1= lkrec.key1.lower()
303 lkrec.key2= lkrec.key2.lower()
305 lkrec.key3= lkrec.key3.lower()
307 lkdict.recs.append(lkrec)
309 if lkdict.title.startswith(
"notok:"):
310 raise ValueError(
"Error extracting record keys: " + lkdict.title[6:])
312 except (RuntimeError, ValueError, OSError)
as err:
313 err_msg=
"notok:" + str(err)
322 Builds a LookUpDict from list that contains what would be read from a file.
324 List will be read and columns extracted by splitting lines with delimiter.
325 Empty and comment (begins with # or //) lines are ignored.
326 First data containing line must contain delimited field names.
327 num_keys specifies how many columns will be used as keys (1-3) and then the next column will be used as value.
328 This number of columns must be present after splitting.
329 If double quote is in line then more precise (but slower) column separation will be used.
330 LookupDict will have its fields and recs lists filled.
332 Returns LookUpDict object whose title will start with notok: if there is an error.
345 raise ValueError(
"title is empty")
346 if num_keys<=0
or num_keys>3:
347 raise ValueError(
"num_keys is not 1-3: " + str(num_keys))
349 raise ValueError(
"delim is empty")
350 delim_char= recfuncs.delim_get_char(delim)
353 lkdict.num_keys=num_keys
354 lkdict.is_case_sens=is_case_sens
358 for line
in lkup_list:
362 linein= linein.strip()
363 if linein.endswith(
"\n"):
365 if len(linein)>0
and not linein.startswith(
"#")
and not linein.startswith(
"//"):
368 if linein.find(delim_char)<1:
369 raise ValueError(
"first data line does not contain delimiter: " + delim_char)
370 if linein.find(dq)>-1:
371 linein=linein.replace(dq,
'')
372 lkdict.fields= linein.split(delim_char)
373 if len(lkdict.fields)< (num_keys+1):
374 raise ValueError(
"too few fields found with delim=" + delim_char +
",#fields=" + len(lkdict.fields)\
375 +
"/" + str(num_keys+1))
377 if linein.find(dq)>-1:
378 cols=recfuncs.split_quoted_line(line,delim_char)
379 if len(cols)>0
and cols[0].startswith(
"notok:"):
380 raise RuntimeError(
"error splitting quoted string: " + cols[0][6:])
382 cols=linein.split(delim_char)
384 if len(cols)< (num_keys+1):
385 raise ValueError(
"too few fields found at nline=" + str(nline) +
",#fields=" + len(cols)\
386 +
"/" + str(num_keys+1))
393 lkrec.result=cols[num_keys]
395 lkrec.key1= lkrec.key1.lower()
397 lkrec.key2= lkrec.key2.lower()
399 lkrec.key3= lkrec.key3.lower()
401 lkdict.recs.append(lkrec)
404 except (RuntimeError, ValueError, OSError)
as err:
405 err_msg=
"notok:" + str(err)
414 Extracts lookup key information and parses into various arrays tokens, wildcards,
415 and boolean AND and NOT conditions to accelerate matching.
417 lkdict: LookUpDict object to process
419 Returns modified LookUpDict object with parsed record information. Title will start with notok: if there is an error.
428 front_wild:bool=
False
436 for i
in range(len(lkdict.recs)):
438 nrec= len(new_recs)-1
439 new_recs[nrec].result= lkdict.recs[i].result
440 for j
in range(lkdict.num_keys):
443 key_str= lkdict.recs[i].key1
444 new_recs[nrec].key1=key_str
446 key_str= lkdict.recs[i].key2
447 new_recs[nrec].key2=key_str
449 key_str= lkdict.recs[i].key3
450 new_recs[nrec].key3=key_str
452 while "-and-" in key_str
or "-not-" in key_str:
453 if key_str.startswith(
"-and-")
or key_str.startswith(
"-not-"):
454 if key_str.startswith(
"-and-"):
463 nand= key_str.find(
"-and-")
464 nnot= key_str.find(
"-not-")
465 if nand>0
and (nnot<0
or nand<nnot):
466 str_temp= key_str[:nand]
467 key_str= key_str[nand:]
468 elif nnot>0
and (nand<0
or nnot<nand):
469 str_temp= key_str[:nnot]
470 key_str= key_str[nnot:]
475 if key_str_typ==
"and":
477 new_recs[i].key1_and.append(str_temp)
478 nkeyand=len(new_recs[i].key1_and)-1
479 new_recs[i].key1_and_front_wild.append(
False)
480 new_recs[i].key1_and_back_wild.append(
False)
482 new_recs[i].key2_and.append(str_temp)
483 nkeyand=len(new_recs[i].key2_and)-1
484 new_recs[i].key2_and_front_wild.append(
False)
485 new_recs[i].key2_and_back_wild.append(
False)
487 new_recs[i].key3_and.append(str_temp)
488 nkeyand=len(new_recs[i].key3_and)-1
489 new_recs[i].key3_and_front_wild.append(
False)
490 new_recs[i].key3_and_back_wild.append(
False)
491 elif key_str_typ==
"not":
493 new_recs[i].key1_not.append(str_temp)
494 nkeynot=len(new_recs[i].key1_not)-1
495 new_recs[i].key1_not_front_wild.append(
False)
496 new_recs[i].key1_not_back_wild.append(
False)
498 new_recs[i].key2_not.append(str_temp)
499 nkeynot=len(new_recs[i].key2_not)-1
500 new_recs[i].key2_not_front_wild.append(
False)
501 new_recs[i].key2_not_back_wild.append(
False)
503 new_recs[i].key3_not.append(str_temp)
504 nkeynot=len(new_recs[i].key3_not)-1
505 new_recs[i].key3_not_front_wild.append(
False)
506 new_recs[i].key3_not_back_wild.append(
False)
510 new_recs[i].key1_and.append(key_str)
511 nkeyand=len(new_recs[i].key1_and)-1
512 new_recs[i].key1_and_front_wild.append(
False)
513 new_recs[i].key1_and_back_wild.append(
False)
515 new_recs[i].key2_and.append(key_str)
516 nkeyand=len(new_recs[i].key2_and)-1
517 new_recs[i].key2_and_front_wild.append(
False)
518 new_recs[i].key2_and_back_wild.append(
False)
520 new_recs[i].key3_and.append(key_str)
521 nkeyand=len(new_recs[i].key3_and)-1
522 new_recs[i].key3_and_front_wild.append(
False)
523 new_recs[i].key3_and_back_wild.append(
False)
528 temp_list=new_recs[i].key1_and
530 temp_list=new_recs[i].key2_and
532 temp_list=new_recs[i].key3_and
543 elif s.startswith(
"*"):
548 elif key_str.endswith(
"*"):
549 key_str=key_str[:len(key_str)-1]
551 elif s.endswith(
"*"):
558 new_recs[i].key1_and[nkeyand]=key_str
559 new_recs[i].key1_and_front_wild[nkeyand]=front_wild
560 new_recs[i].key1_and_back_wild[nkeyand]=back_wild
562 new_recs[i].key2_and[nkeyand]=key_str
563 new_recs[i].key2_and_front_wild[nkeyand]=front_wild
564 new_recs[i].key2_and_back_wild[nkeyand]=back_wild
566 new_recs[i].key3_and[nkeyand]=key_str
567 new_recs[i].key3_and_front_wild[nkeyand]=front_wild
568 new_recs[i].key3_and_back_wild[nkeyand]=back_wild
572 temp_list=new_recs[i].key1_not
574 temp_list=new_recs[i].key2_not
576 temp_list=new_recs[i].key3_not
587 elif s.startswith(
"*"):
592 elif key_str.endswith(
"*"):
595 elif s.endswith(
"*"):
602 new_recs[i].key1_not[nkeynot]=key_str
603 new_recs[i].key1_not_front_wild[nkeynot]=front_wild
604 new_recs[i].key1_not_back_wild[nkeynot]=back_wild
606 new_recs[i].key2_not[nkeynot]=key_str
607 new_recs[i].key2_not_front_wild[nkeynot]=front_wild
608 new_recs[i].key2_not_back_wild[nkeynot]=back_wild
610 new_recs[i].key3_not[nkeynot]=key_str
611 new_recs[i].key3_not_front_wild[nkeynot]=front_wild
612 new_recs[i].key3_not_back_wild[nkeynot]=back_wild
616 lkdict.recs.append(rec)
618 except (RuntimeError, ValueError, OSError)
as err:
619 err_msg=
"notok:" + str(err)