How to remove the duplicate of .bib files
I have multiples chapters which need to be merged into one thesis. But part of their reference.bib files are overlapped. I need to find a way to remove the duplicate when I merge the .bib files.
I searched on google but no good results. So I wrote a script myself (with the help of GPT of course). The full script is as follows.
def read_bib_file(file_path):
"""读取并分割.bib文件的内容"""
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read().split('\n@')
return content
def remove_duplicates(bib_items):
"""去除重复的文献项"""
unique_items = {}
for item in bib_items:
label = item.split('{', 1)[1].split(',', 1)[0].strip()
if label not in unique_items:
unique_items[label] = item
return list(unique_items.values())
def write_bib_file(file_path, bib_items):
"""写入处理后的.bib文件"""
with open(file_path, 'w', encoding='utf-8') as file:
for item in bib_items:
file.write('@' + item + '\n')
# 主函数
def main():
input_path = '/home/jjia/data/lung_function/lung_function/scripts/reference.bib' # 替换为你的.bib文件路径
output_path = '/home/jjia/data/lung_function/lung_function/scripts/reference_clean.bib' # 替换为输出文件的路径
bib_items = read_bib_file(input_path)
unique_items = remove_duplicates(bib_items)
write_bib_file(output_path, unique_items)
print(f"处理完成,去重后的文件已保存到 {output_path}")
if __name__ == "__main__":
main()
Leave a Comment