2017-12-16 22:15:12 +01:00
|
|
|
#!/usr/bin/env php
|
|
|
|
<?php
|
|
|
|
|
2017-12-18 15:02:24 +01:00
|
|
|
$bkup_dir = __DIR__ . '/../data/';
|
2017-12-16 22:15:12 +01:00
|
|
|
|
2018-01-05 15:14:47 +01:00
|
|
|
$ansi_csi = chr(27) . '[';
|
|
|
|
$ansi_updel = $ansi_csi . 'F' . $ansi_csi . 'K';
|
|
|
|
|
|
|
|
echo 'Reading file list ...';
|
2017-12-16 22:15:12 +01:00
|
|
|
$file_list = glob($bkup_dir . '*.xml');
|
2018-01-06 17:42:11 +01:00
|
|
|
|
|
|
|
$num_total = count($file_list);
|
|
|
|
$num_deleted = 0;
|
|
|
|
|
|
|
|
echo 'found ' . $num_total . ' files.' . PHP_EOL;
|
2018-01-05 15:14:47 +01:00
|
|
|
|
|
|
|
echo 'Searching for duplicates ...' . PHP_EOL . PHP_EOL;
|
2017-12-16 22:15:12 +01:00
|
|
|
|
|
|
|
$hashes = array();
|
2018-01-05 15:14:47 +01:00
|
|
|
foreach ($file_list as $i => $file) {
|
|
|
|
echo $ansi_updel . ($i+1) . '/' . count($file_list) . PHP_EOL;
|
2017-12-16 22:15:12 +01:00
|
|
|
$filename = basename($file);
|
|
|
|
$file_hash = sha1_file($file);
|
|
|
|
|
|
|
|
if (isset($hashes[$file_hash])) {
|
|
|
|
$old_file = $hashes[$file_hash];
|
|
|
|
if (md5_file($file) == md5_file($bkup_dir . $old_file)) {
|
2018-01-05 15:14:47 +01:00
|
|
|
echo $ansi_updel . 'Duplicate file: ' . $filename . ' (first: ' . $old_file . ')' . PHP_EOL . PHP_EOL;
|
2017-12-16 22:15:12 +01:00
|
|
|
unlink($file);
|
2018-01-06 17:42:11 +01:00
|
|
|
$num_deleted++;
|
2017-12-16 22:15:12 +01:00
|
|
|
continue;
|
|
|
|
}
|
2018-01-05 15:14:47 +01:00
|
|
|
echo $ansi_updel . 'Possible SHA1 collision?' . PHP_EOL . PHP_EOL;
|
2017-12-16 22:15:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
$hashes[$file_hash] = $filename;
|
2018-01-05 15:14:47 +01:00
|
|
|
flush();
|
2017-12-16 22:15:12 +01:00
|
|
|
}
|
2018-01-05 15:14:47 +01:00
|
|
|
|
2018-01-06 17:42:11 +01:00
|
|
|
echo count($file_list) . ' files processed. ' . $num_deleted . ' deleted. ' . ($num_total - $num_deleted) . ' left.' . PHP_EOL;
|
2018-01-05 15:14:47 +01:00
|
|
|
|