AI generated translations, quality is really good. Much of it looks to be based off of Nanamoli's translations, much of it is correct, although there will be errors I'm sure.
What is super cool about it is the navigation.
For example, Python, a language I don't even know, I used the AI to help me write code to map the DPR (digital pali reader) archaic sutta numbering systems and tag them with modern sutta numbers.
It's not an easy or quick thing to do, even if you're a programming expert AND knowledgable about the differences between the two numbering systems.
With AI, I was able to tag the modern sutta citations in a few hours of work coaxing and coaching the AI to write the code for me.
Python program to append a modern sutta ref. number next to DPR pāḷi source with archaic numbering
import re
def create_authoritative_maps():
"""
Creates the two definitive data structures for mapping, meticulously checked
against canonical sources like Sutta Central.
"""
name_to_num = {
'devatāsaṃyuttaṃ': 1, 'devaputtasaṃyuttaṃ': 2, 'kosalasaṃyuttaṃ': 3,
'mārasaṃyuttaṃ': 4, 'bhikkhunīsaṃyuttaṃ': 5, 'brahmasaṃyuttaṃ': 6,
'brāhmaṇasaṃyuttaṃ': 7, 'vaṅgīsasaṃyuttaṃ': 8, 'vanasaṃyuttaṃ': 9,
'yakkhasaṃyuttaṃ': 10, 'sakkasaṃyuttaṃ': 11, 'nidānasaṃyuttaṃ': 12,
'abhisamayasaṃyuttaṃ': 13, 'dhātusaṃyuttaṃ': 14, 'anamataggasaṃyuttaṃ': 15,
'kassapasaṃyuttaṃ': 16, 'lābhasakkārasaṃyuttaṃ': 17, 'rāhulasaṃyuttaṃ': 18,
'lakkhaṇasaṃyuttaṃ': 19, 'opammasaṃyuttaṃ': 20, 'bhikkhusaṃyuttaṃ': 21,
'khandhasaṃyuttaṃ': 22, 'rādhasaṃyuttaṃ': 23, 'diṭṭhisaṃyuttaṃ': 24,
'okkantasaṃyuttaṃ': 25, 'uppādasaṃyuttaṃ': 26, 'kilesasaṃyuttaṃ': 27,
'sāriputtasaṃyuttaṃ': 28, 'nāgasaṃyuttaṃ': 29, 'supaṇṇasaṃyuttaṃ': 30,
'gandhabbakāyasaṃyuttaṃ': 31, 'valāhakasaṃyuttaṃ': 32, 'vacchagottasaṃyuttaṃ': 33,
'jhānasaṃyuttaṃ': 34,
'saḷāyatanasaṃyuttaṃ': 35, 'vedanāsaṃyuttaṃ': 36, 'mātugāmasaṃyuttaṃ': 37,
'jambukhādakasaṃyuttaṃ': 38, 'sāmaṇḍakasaṃyuttaṃ': 39, 'moggallānasaṃyuttaṃ': 40,
'cittasaṃyuttaṃ': 41, 'gāmaṇisaṃyuttaṃ': 42, 'abyākatasaṃyuttaṃ': 43,
'anāthapiṇḍikasaṃyuttaṃ': 44, 'maggasaṃyuttaṃ': 45, 'bojjhaṅgasaṃyuttaṃ': 46,
'satipaṭṭhānasaṃyuttaṃ': 47, 'indriyasaṃyuttaṃ': 48, 'sammappadhānasaṃyuttaṃ': 49,
'balasaṃyuttaṃ': 50, 'iddhipādasaṃyuttaṃ': 51, 'anuruddhasaṃyuttaṃ': 52,
'jhānābhinayasaṃyuttaṃ': 53,
'ānāpānasaṃyuttaṃ': 54, 'sotāpattisaṃyuttaṃ': 55, 'saccasaṃyuttaṃ': 56
}
toc = {
1: [10, 10, 10, 10, 10, 10, 10, 11], 2: [10, 10, 10], 3: [10, 10, 5], 4: [10, 10, 5],
5: [10], 6: [10, 5], 7: [10, 12], 8: [12], 9: [14], 10: [12], 11: [10, 10, 5],
12: [10, 10, 10, 10, 10, 11, 10, 12, 7], 13: [11], 14: [10, 10, 10, 9], 15: [10, 10],
16: [13], 17: [10, 10, 10, 4], 18: [10, 12], 19: [21], 20: [12], 21: [12],
22: [10, 10, 10, 10, 11, 10, 10, 10, 10, 10, 10, 10, 13, 10, 10], 23: [10, 10, 10, 14],
24: [10, 10, 10, 10, 10, 10, 10, 28], 25: [10], 26: [10], 27: [10], 28: [10],
29: [50], 30: [46], 31: [22], 32: [54], 33: [55], 34: [55],
35: [10, 10, 10, 10, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 12, 12, 10, 10, 10, 10, 10, 10, 13],
36: [10, 10, 11], 37: [34], 38: [16], 39: [16], 40: [11], 41: [10], 42: [13], 43: [44], 44: [11],
45: [10, 10, 10, 10, 10, 10, 10, 10, 11, 10, 10, 10, 10, 10, 20],
46: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 10],
47: [10, 10, 10, 10, 10, 10, 10, 10, 10, 4],
48: [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],
49: [12], 50: [12], 51: [10, 10, 12], 52: [12, 12], 53: [54], 54: [10, 10],
55: [10, 10, 10, 10, 10, 13, 11], 56: [10, 10, 10, 10, 11, 9, 10, 10, 10, 10, 31],
}
return name_to_num, toc
def process_and_cite_all(input_file, output_file):
name_to_num_map, samyutta_toc = create_authoritative_maps()
lines = []
with open(input_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
citations = {}
current_samyutta_num = 0
current_vagga_num = 0
sutta_offset = 0
samyutta_regex = re.compile(r'^\s*♦\s*\d*\.?\s*([\wāīūñṭḍṇḷṃĀĪŪÑṬḌṆḶṂ]+saṃyuttaṃ)')
vagga_regex = re.compile(r'^\s*♦\s*(\d+)\.\s*([\wāīūñṭḍṇḷṃĀĪŪÑṬḌṆḶṂ]+vaggo)')
vannana_regex = re.compile(r'^\s*♦\s*(\d+)(-(\d+))?\.\s*.*suttavaṇṇanā')
for i, line in enumerate(lines):
samyutta_match = samyutta_regex.search(line)
vagga_match = vagga_regex.search(line)
if samyutta_match:
samyutta_name = samyutta_match.group(1).strip()
if samyutta_name in name_to_num_map:
new_samyutta_num = name_to_num_map[samyutta_name]
if new_samyutta_num > current_samyutta_num:
current_samyutta_num = new_samyutta_num
current_vagga_num = 0
sutta_offset = 0
# Add citation for the Saṃyutta heading itself
citations[i] = f" SN {current_samyutta_num}"
elif vagga_match and current_samyutta_num > 0:
new_vagga_num = int(vagga_match.group(1))
if new_vagga_num > current_vagga_num:
current_vagga_num = new_vagga_num
toc_vagga_index = current_vagga_num - 1
if current_samyutta_num in samyutta_toc and 0 <= toc_vagga_index < len(samyutta_toc[current_samyutta_num]):
sutta_offset = sum(samyutta_toc[current_samyutta_num][:toc_vagga_index])
vannana_match = vannana_regex.search(line)
if vannana_match and current_samyutta_num > 0:
# Handle implicit first vagga
if current_vagga_num == 0:
current_vagga_num = 1
sutta_offset = 0
local_start = int(vannana_match.group(1))
final_start = sutta_offset + local_start
citation_str = f" SN {current_samyutta_num}.{final_start}"
end_num_str = vannana_match.group(3)
if end_num_str:
local_end = int(end_num_str)
final_end = sutta_offset + local_end
citation_str = f" SN {current_samyutta_num}.{final_start}-{final_end}"
citations[i] = citation_str
with open(output_file, 'w', encoding='utf-8') as outfile:
for i, line in enumerate(lines):
if i in citations:
outfile.write(f"{line.rstrip()}{citations[i]}\n")
else:
outfile.write(line)
print(f"\nProcessing complete with Saṃyutta citations. Output written to '{output_file}'")
if __name__ == "__main__":
process_and_cite_all("notes.txt", "notes_with_citations_full.txt")
Comments
Post a Comment