1
0
mirror of https://github.com/mbirth/tcl_update_db.git synced 2024-09-20 01:03:26 +01:00

Cleanup tool to remove duplicate XMLs.

This commit is contained in:
Markus Birth 2017-12-16 22:15:12 +01:00
parent 9f6e0b7136
commit 30b5d08e34
Signed by: mbirth
GPG Key ID: A9928D7A098C3A9A

24
clean_dupes.php Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env php
<?php
$bkup_dir = __DIR__ . '/data/';
$file_list = glob($bkup_dir . '*.xml');
$hashes = array();
foreach ($file_list as $file) {
$filename = basename($file);
$file_hash = sha1_file($file);
if (isset($hashes[$file_hash])) {
$old_file = $hashes[$file_hash];
if (md5_file($file) == md5_file($bkup_dir . $old_file)) {
echo 'Duplicate file: ' . $filename . ' (first: ' . $old_file . ')' . PHP_EOL;
unlink($file);
continue;
}
echo 'Possible SHA1 collision?' . PHP_EOL;
}
$hashes[$file_hash] = $filename;
}