How To Calculate Frequency Of Elements For Pairwise Comparisons Of Lists In Python?
I have the the sample stored in the following list sample = [AAAA,CGCG,TTTT,AT-T,CATC] .. To illustrate the problem, I have denoted them as 'Sets' below Set1 AAAA Set2 CGCG Set3
Solution 1:
I think this is what you want:
from collections import Counter
# Remove elements where all nucleobases are the same.forindex in range(len(sample) - 1, -1, -1):
if sample[index][:1] * len(sample[index]) == sample[index]:
del sample[index]
for indexA, setA in enumerate(sample):
for indexB, setB in enumerate(sample):
# Don't compare samples with themselves nor compare same pair twice.if indexA <= indexB:
continue# Calculate number of unique pairs
pair_count = Counter()
for pair in zip(setA, setB):
if'-'not in pair:
pair_count[pair] += 1# Only analyse pairs of sets with 2 unique pairs.if len(pair_count) != 2:
continue# Count individual bases.
base_counter = Counter()
for pair, count in pair_count.items():
base_counter[pair[0]] += count
base_counter[pair[1]] += count
# Get the length of one of each item in the pair.
sequence_length = sum(pair_count.values())
# Convert counts to frequencies.
base_freq = {}
for base, count in base_counter.items():
base_freq[base] = count / float(sequence_length)
# Examine a pair from the two unique pairs to calculate float_a.
pair = list(pair_count)[0]
float_a = (pair_count[pair] / float(sequence_length)) - base_freq[pair[0]] * base_freq[pair[1]]
# Step 7!
float_b = float_a / float(base_freq.get('A', 0) * base_freq.get('T', 0) * base_freq.get('C', 0) * base_freq.get('G', 0))
Or, more Pythonically (with the list/dict comprehensions you don't want):
from collections import Counter
BASES = 'ATCG'# Remove elements where all nucleobases are the same.
sample = [item for item in sample if item[:1] * len(item) != item]
for indexA, setA inenumerate(sample):
for indexB, setB inenumerate(sample):
# Don't compare samples with themselves nor compare same pair twice.if indexA <= indexB:
continue# Calculate number of unique pairs
relevant_pairs = [(elA, elB) for (elA, elB) inzip(setA, setB) if elA != '-'and elB != '-']
pair_count = Counter(relevant_pairs)
# Only analyse pairs of sets with 2 unique pairs.iflen(pair_count) != 2:
continue# setA and setB as tuples with pairs involving '-' removed.
setA, setB = zip(*relevant_pairs)
# Get the total for each base.
seq_length = len(setA)
# Convert counts to frequencies.
base_freq = {base : count / float(seq_length) for (base, count) in (Counter(setA) + Counter(setB)).items()}
# Examine a pair from the two unique pairs to calculate float_a.
pair = list(pair_count)[0]
float_a = (pair_count[pair] / float(seq_length)) - base_freq[pair[0]] * base_freq[pair[1]]
# Step 7!
denominator = 1for base in BASES:
denominator *= base_freq.get(base, 0)
float_b = float_a / denominator
Post a Comment for "How To Calculate Frequency Of Elements For Pairwise Comparisons Of Lists In Python?"