129 lines
4.3 KiB
Python
129 lines
4.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
Script de diagnostic pour analyser un nom de fichier problématique.
|
|||
|
|
|
|||
|
|
Usage:
|
|||
|
|
python3 diagnose_filename.py "04-Tout s'arrange à la fin.flac"
|
|||
|
|
|
|||
|
|
Ou pour analyser tous les fichiers d'un dossier :
|
|||
|
|
python3 diagnose_filename.py /chemin/vers/dossier
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
|
|||
|
|
def analyze_filename(filename):
|
|||
|
|
"""Analyse en détail un nom de fichier."""
|
|||
|
|
print(f"\n{'='*70}")
|
|||
|
|
print(f"ANALYSE DU FICHIER")
|
|||
|
|
print(f"{'='*70}")
|
|||
|
|
print(f"\n📄 Nom affiché : {filename}")
|
|||
|
|
print(f"📊 Longueur : {len(filename)} caractères")
|
|||
|
|
print(f"\n🔍 Représentation Python :")
|
|||
|
|
print(f" {repr(filename)}")
|
|||
|
|
|
|||
|
|
# Analyse octet par octet
|
|||
|
|
print(f"\n🔢 Octets (hex) :")
|
|||
|
|
try:
|
|||
|
|
encoded = filename.encode('utf-8')
|
|||
|
|
hex_str = ' '.join(f'{b:02x}' for b in encoded)
|
|||
|
|
print(f" {hex_str}")
|
|||
|
|
except:
|
|||
|
|
print(f" ❌ Impossible d'encoder en UTF-8")
|
|||
|
|
|
|||
|
|
# Recherche de caractères suspects
|
|||
|
|
print(f"\n🔎 Caractères suspects :")
|
|||
|
|
found_issues = False
|
|||
|
|
for i, char in enumerate(filename):
|
|||
|
|
code = ord(char)
|
|||
|
|
if code > 127 or code == 0xA0: # Non-ASCII ou espace insécable
|
|||
|
|
print(f" Position {i:2d}: '{char}' (U+{code:04X} / {code})")
|
|||
|
|
found_issues = True
|
|||
|
|
|
|||
|
|
if not found_issues:
|
|||
|
|
print(f" ✅ Aucun caractère suspect détecté")
|
|||
|
|
|
|||
|
|
# Test de conversion
|
|||
|
|
print(f"\n🔄 Test de correction :")
|
|||
|
|
try:
|
|||
|
|
fixed = filename.encode('iso-8859-1').decode('utf-8')
|
|||
|
|
if fixed == filename:
|
|||
|
|
print(f" ℹ️ La conversion ne change rien")
|
|||
|
|
else:
|
|||
|
|
print(f" ✅ Conversion réussie !")
|
|||
|
|
print(f" 📄 Nom corrigé : {fixed}")
|
|||
|
|
print(f" 🔍 Représentation : {repr(fixed)}")
|
|||
|
|
except UnicodeDecodeError as e:
|
|||
|
|
print(f" ❌ Erreur de décodage UTF-8 : {e}")
|
|||
|
|
print(f" 💡 Le fichier contient un mélange d'encodages")
|
|||
|
|
except UnicodeEncodeError as e:
|
|||
|
|
print(f" ❌ Erreur d'encodage ISO-8859-1 : {e}")
|
|||
|
|
print(f" 💡 Le fichier contient des caractères non compatibles ISO-8859-1")
|
|||
|
|
|
|||
|
|
# Détection des patterns connus
|
|||
|
|
patterns = {
|
|||
|
|
'é': 'é', 'è': 'è', 'ê': 'ê', 'ë': 'ë',
|
|||
|
|
'Ã ': 'à', 'Ã\xa0': 'à (avec espace insécable)',
|
|||
|
|
'â': 'â', 'ä': 'ä', 'ç': 'ç',
|
|||
|
|
'ô': 'ô', 'ö': 'ö', 'ù': 'ù', 'û': 'û', 'ü': 'ü',
|
|||
|
|
'î': 'î', 'ï': 'ï', 'Å"': 'œ'
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
print(f"\n📋 Patterns d'encodage détectés :")
|
|||
|
|
found_patterns = []
|
|||
|
|
for pattern, correct in patterns.items():
|
|||
|
|
if pattern in filename:
|
|||
|
|
found_patterns.append((pattern, correct))
|
|||
|
|
|
|||
|
|
if found_patterns:
|
|||
|
|
for pattern, correct in found_patterns:
|
|||
|
|
print(f" • '{pattern}' devrait être '{correct}'")
|
|||
|
|
else:
|
|||
|
|
print(f" ℹ️ Aucun pattern connu détecté")
|
|||
|
|
|
|||
|
|
print(f"\n{'='*70}\n")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
if len(sys.argv) < 2:
|
|||
|
|
print("Usage:")
|
|||
|
|
print(' python3 diagnose_filename.py "nom-du-fichier.ext"')
|
|||
|
|
print(" python3 diagnose_filename.py /chemin/vers/dossier")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
target = sys.argv[1]
|
|||
|
|
|
|||
|
|
if os.path.isdir(target):
|
|||
|
|
# Analyse tous les fichiers du dossier
|
|||
|
|
print(f"\n{'#'*70}")
|
|||
|
|
print(f"ANALYSE DU DOSSIER : {target}")
|
|||
|
|
print(f"{'#'*70}")
|
|||
|
|
|
|||
|
|
files_with_issues = []
|
|||
|
|
|
|||
|
|
for dirpath, dirnames, filenames in os.walk(target):
|
|||
|
|
for filename in filenames:
|
|||
|
|
# Cherche des patterns suspects
|
|||
|
|
if any(p in filename for p in ['Ã', 'Å', 'Ã']):
|
|||
|
|
files_with_issues.append((dirpath, filename))
|
|||
|
|
|
|||
|
|
if not files_with_issues:
|
|||
|
|
print(f"\n✅ Aucun fichier avec problème d'encodage détecté!")
|
|||
|
|
else:
|
|||
|
|
print(f"\n⚠️ {len(files_with_issues)} fichier(s) avec problème d'encodage détecté(s):\n")
|
|||
|
|
for dirpath, filename in files_with_issues:
|
|||
|
|
rel_path = os.path.relpath(dirpath, target)
|
|||
|
|
if rel_path == '.':
|
|||
|
|
rel_path = '(racine)'
|
|||
|
|
print(f"\n📁 {rel_path}")
|
|||
|
|
analyze_filename(filename)
|
|||
|
|
else:
|
|||
|
|
# Analyse un seul nom de fichier
|
|||
|
|
analyze_filename(target)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
main()
|