[docs]@staticmethoddefget_imgt_position_weights_for_implanting(aa_input_length:int,region_type:RegionType,sequence_position_weights:dict,limit:int):position_weights=PositionHelper.get_imgt_position_weights_for_annotation(aa_input_length,region_type,sequence_position_weights)forindex,positioninenumerate(position_weights.keys()):ifindex>aa_input_length-limit:position_weights[position]=0.weights_sum=sum(list(position_weights.values()))ifweights_sum==0:logging.warning(f"Sequence of length {aa_input_length} has no allowed positions for signal with sequence "f"position weights {sequence_position_weights} and motif length {limit}, it will be discarded.")returnposition_weightsposition_weights={position:np.array([weight]).astype(np.float64)[0]/weights_sumforposition,weightinposition_weights.items()}assertnp.isclose(sum(list(position_weights.values())),1.), \
(aa_input_length,region_type.name,position_weights,sum(list(position_weights.values())),limit)returnposition_weights
[docs]@staticmethoddefgen_imgt_positions_from_cdr3_length(input_length:int)->list:ifPositionHelper.MIN_CDR3_LEN<=input_length<=PositionHelper.MIDPOINT_CDR3_LEN:positions=[105,106,107,116,117]pos_left_count=(input_length-PositionHelper.MIN_CDR3_LEN)//2pos_right_count=input_length-PositionHelper.MIN_CDR3_LEN-pos_left_countpositions=([str(pos)forposinpositionsifpos<=107]+[str(i)foriinrange(108,107+pos_left_count+1)]+[str(i)foriinrange(116-pos_right_count,116)]+['116','117'])returnpositionselifPositionHelper.MIDPOINT_CDR3_LEN<input_length<=PositionHelper.MAX_CDR3_LEN:positions=list(range(105,118))pos111_count=(input_length-PositionHelper.MIDPOINT_CDR3_LEN)//2pos112_count=input_length-PositionHelper.MIDPOINT_CDR3_LEN-pos111_countpositions=([str(pos)forposinpositionsifpos<=111]+[f'111.{i}'foriinrange(1,pos111_count+1)]+[f'112.{i}'foriinrange(pos112_count,0,-1)]+[str(pos)forposinpositionsifpos>=112])returnpositionselse:logging.warning(f"IMGT positions could not be generated for CDR3 sequence of length {input_length}.")return[]
[docs]@staticmethoddefgen_imgt_positions_from_junction_length(input_length:int):ifPositionHelper.MIN_CDR3_LEN+2<=input_length<=PositionHelper.MAX_CDR3_LEN+2:return['104']+PositionHelper.gen_imgt_positions_from_cdr3_length(input_length-2)+['118']else:logging.warning(f"IMGT positions could not be generated for IMGT junction sequence of length {input_length}.")return[]
[docs]@staticmethoddefgen_imgt_positions_from_sequence(sequence:ReceptorSequence,sequence_type:SequenceType=SequenceType.AMINO_ACID,region_type:RegionType=RegionType.IMGT_CDR3):ifsequence_type!=SequenceType.AMINO_ACID:raiseNotImplementedError(f"{sequence_type.name} is currently not supported for obtaining IMGT positions")input_length=len(sequence.get_sequence(sequence_type=sequence_type))returnPositionHelper.gen_imgt_positions_from_length(input_length,region_type)
[docs]@staticmethoddefgen_imgt_positions_from_length(input_length:int,region_type:RegionType):ifregion_type==RegionType.IMGT_CDR3:returnPositionHelper.gen_imgt_positions_from_cdr3_length(input_length)ifregion_type==RegionType.IMGT_JUNCTION:returnPositionHelper.gen_imgt_positions_from_junction_length(input_length)else:raiseNotImplementedError(f"PositionHelper: IMGT positions are not implemented for region type {region_type}")