Reecriture de la fonction sanitize

Reecriture de la fonction sanitize afin d’éviter le bug sur les épisode contenant FiNAL
2025-05-29 20:20:42 +00:00 · 2025-05-29 20:20:42 +00:00 · 33a1733c9f
commit 33a1733c9f
parent 67cab29bea
1 changed files with 18 additions and 28 deletions
--- a/postprocess.sh
+++ b/postprocess.sh
@ -71,98 +71,88 @@ remove_accents() {
 #   - dernier segment préfixé par un tiret
 # ----------------------------------
 sanitize_name() {
-  # Extraction du nom de base et de l'extension
  local filepath="$1"
  local filename="$(basename "$filepath")"
  local ext="${filename##*.}"
  local name="${filename%.*}"

-  # On veut vérifier que name ne contient PAS l’un des mots suivants
  local upper_name="${name^^}"
-  
-  # Si le nom est entièrement en minuscules
-  # OU ne contient aucun des mots-clés SUBFRENCH|TRUEFRENCH|FRENCH|MULTI|VOSTFR
  if [[ "$name" == "${name,,}" ]] || ! [[ "$upper_name" =~ THEATRICAL|DIRECTOR|EXTENDED|REMASTERED|UNRATED|UNCENSORED|DOC|PART|REPACK|PROPER|INTERNAL|FINAL|SUBFRENCH|TRUEFRENCH|FRENCH|MULTI|VOSTFR ]]; then
    local parent="$(basename "$(dirname "$filepath")")"
    name="$(remove_accents "$parent")"
  fi

-  # Translit accents
  name="$(remove_accents "$name")"
-  # Normalisation des séparateurs
  name="${name//[ _]/.}"

-  # Découpage en segments
  IFS='.' read -ra parts <<< "$name"
  local total=${#parts[@]}
  local keep=()
-  local i j k
  local marker_index=-1
  local pattern

-  # --- 1) d'abord, on regarde si on a un SxxEyy quelque part ---
+  # Trouver SxxEyy ou année
  for ((i=0; i<total; i++)); do
    if [[ "${parts[i]}" =~ ^[Ss][0-9]{2}[Ee][0-9]{2}$ ]]; then
      marker_index=$i
-      pattern='^[Ss][0-9]{2}[Ee][0-9]{2}$'
      break
    fi
  done
-
-  # --- 2) si pas de série, on cherche une année (film) ---
  if (( marker_index < 0 )); then
    for ((i=0; i<total; i++)); do
      if [[ "${parts[i]}" =~ ^(19|20)[0-9]{2}$ ]]; then
        marker_index=$i
-        pattern='^(19|20)[0-9]{2}$'
        break
      fi
    done
  fi

-  # --- 3) si on a trouvé un marqueur, on construit keep[] ---
  if (( marker_index >= 0 )); then
-    # on garde tout jusqu'au marqueur inclus
    keep=("${parts[@]:0:marker_index+1}")
-    # on cherche ensuite le premier tag qualité/langue
+
+    # Premier tag genre FINAL/PROPER/INTERNAL, etc.
    for ((j=marker_index+1; j<total; j++)); do
      tag="${parts[j]^^}"
-      if [[ "$tag" =~ ^(THEATRICAL|DIRECTOR|EXTENDED|REMASTERED|UNRATED|UNCENSORED|DOC|PART|REPACK|PROPER|INTERNAL|FINAL|SUBFRENCH|TRUEFRENCH|FRENCH|MULTI|VOSTFR)$ ]] \
-         || [[ "$tag" =~ ^[0-9]{3,4}P$ ]]; then
-        # on ajoute tout à partir de ce tag
+      if [[ "$tag" =~ ^(THEATRICAL|DIRECTOR|EXTENDED|REMASTERED|UNRATED|UNCENSORED|DOC|PART|REPACK|PROPER|INTERNAL|FINAL)$ ]]; then
+        keep+=("${parts[j]}")
+        # on continue la recherche pour un tag langue/qualité
+        for ((k=j+1; k<total; k++)); do
+          t2="${parts[k]^^}"
+          if [[ "$t2" =~ ^(SUBFRENCH|TRUEFRENCH|FRENCH|MULTI|VOSTFR)$ ]] || [[ "$t2" =~ ^[0-9]{3,4}P$ ]]; then
+            keep+=("${parts[@]:k}")
+            break
+          fi
+        done
+        break
+      fi
+      # Si c'est directement un tag langue/qualité
+      if [[ "$tag" =~ ^(SUBFRENCH|TRUEFRENCH|FRENCH|MULTI|VOSTFR)$ ]] || [[ "$tag" =~ ^[0-9]{3,4}P$ ]]; then
        keep+=("${parts[@]:j}")
        break
      fi
    done
  else
-    # si pas de marqueur du tout, on garde tout
    keep=("${parts[@]}")
  fi

-  # reconstruire la base nettoyée
  local out
  out=$(IFS='.'; echo "${keep[*]}")
-
-  # nettoyage TOS, espaces, caractères interdits
  out=$(echo "$out" | sed -e 's/(TOS)//Ig' -e 's/[ _]/./g' -e 's/[^A-Za-z0-9.-]//g')
-  # apostrophes + majuscule suivante avec perl
  out=$(echo "$out" | perl -pe 's/'"'"'([A-Za-z])/.\U\1/g')
-  # forcer le "i" minuscule dans MULTI, FINAL et INTERNAL
  out=${out//.MULTI./.MULTi.}
  out=${out//.FINAL./.FiNAL.}
  out=${out//.INTERNAL./.iNTERNAL.}

-  # Préfixe du dernier segment par un tiret uniquement si aucune autre tiret interne
  local last="${out##*.}"
  if [[ "$last" != *-* ]]; then
    out="${out%.*}-$last"
  fi

-  # Ajout de l'extension
  printf "%s.%s" "$out" "$ext"
 }


+
 # ----------------------------------
 # Traitement des .mkv uniquement
 # ----------------------------------