
    di                       U d dl mZ d dlZd dlmZ d dlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZmZ  ej        d          Z ej                     Z!e!"                     ej#        d                     g Z$de%d<   g Z&de%d<   eD ]YZ'	  ee'          re$(                    e'           ne&(                    e'           :# e)$ r e&(                    e'           Y Vw xY we$e&z   Z*de%d<   	 	 	 	 	 	 	 	 	 d2d3d(Z+	 	 	 	 	 	 	 	 	 d2d4d+Z,	 	 	 	 	 	 	 	 	 d2d5d.Z-	 	 	 	 	 	 	 	 	 d6d7d1Z.dS )8    )annotationsN)PathLike)BinaryIO   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDIANA_SUPPORTED_SIMILARTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)sz	list[str]_mb_supported_sb_supportedIANA_SUPPORTED_MB_FIRST      皙?TF皙?	sequencesbytes | bytearraystepsint
chunk_size	thresholdfloatcp_isolationlist[str] | Nonecp_exclusionpreemptive_behaviourboolexplainlanguage_thresholdenable_fallbackreturnr   c
                R$   t          | t          t          f          s/t          d                    t          |                               |rJt          j        }
t                              t                     t          
                    t                     t          |           }|dk    rwt                              d           |r9t                              t                     t          
                    |
           t          t!          | dddg d          g          S |At                              t          d	d
                    |                     d |D             }ng }|At                              t          dd
                    |                     d |D             }ng }|||z  k    r't                              t          d|||           d}|}|dk    r||z  |k     rt'          ||z            }t          |           t(          k     }t          |           t*          k    }|r4t                              t          d                    |                     n5|r3t                              t          d                    |                     g }|rt-          |           nd}|6|                    |           t                              t          d|           t1                      }g }g }t1                      }t1                      }i }d}t1                      }d}d}d}d}d}d}t                      }t                      }t3          |           \  } }!| D|                    |            t                              t          dt          |!          |            |                    d           d|vr|                    d           |t4          z   D ]}"|r|"|vr
|r|"|v r|"|v r|                    |"           d}#| |"k    }$|$ot9          |"          }%|"dv r$|$s"t                              t          d|"           l|"dv r$|$s"t                              t          d|"           |"|v r"t                              t          d|"           |"|v r"t                              t          d|"           	 t;          |"          }&n9# t<          t>          f$ r% t                              t          d|"           Y &w xY w|ru|&st1          tA          |"                    }'nt1          tC          |"                    }'|'"                    |          s%t                              t          d|"|'|           |r-|&s+||k    r%t                              t          d|"||           |r%|&s#t                              t          d|"           	 |rS|&du rOtG          |%du r| dt'          d                    n#| t          |!          t'          d                    |"!           n^|"d"k    r,|$r*tG          | |"!          }#|#r|#d         d#k    r
|#dd         }#n,tG          |%du r| n| t          |!          d         |"!          }#nx# tH          tJ          f$ rd}(t          |(tJ                    s/t                              t          d$|"tG          |(                     |                    |"           Y d}(~(d}(~(ww xY wtM          |$sdnt          |!          |t'          ||z                      })|&o|#duot          |#          |k     }*|*r!t                              t          d%|"           |#V|&sStO          |#          }+|(                    |+          },|,,|,\  }-}.}/|/rt!          | |"|-|$|.|du s|"|ddfv r|#nd|&          }0|                    |0           |                    |"           t                              t          d'|"tS          |-d(z  d)*                     |"|ddfv r|-d+k     r|-dk    rmt                              d,|0j*                   |r9t                              t                     t          
                    |
           t          |0g          c S |                    |0           t          |          r|||v rd|v rd|v r|+                                }1t                              d,|1j*                   |r9t                              t                     t          
                    |
           t          |1g          c S ~|                    |"           t                              t          d-|"           |	r3|"dd|d.d/fv r*t!          | |"||$g |#|&          }2|"|k    r|2}n|"dk    r|2}n|2}t'          t          |)          d0z            }3tY          |3d1          }3d}4d}5g }6g }7	 t[          | |"|)||$|%|!|&|#	  	        D ]y}8|6                    |8           |7                    t]          |8||d2u odt          |          cxk    od1k    nc                      |7d3         |k    r|4dz  }4|4|3k    s|$r|%du r nznJ# tH          $ r=}(t                              t          d4|"tG          |(                     |3}4d2}5Y d}(~(nd}(~(ww xY w|5s|r|&s	 | t'          d5          d         /                    |"d67           n\# tH          $ rO}(t                              t          d8|"tG          |(                     |                    |"           Y d}(~(d}(~(ww xY w|7rta          |7          t          |7          z  nd}9|9|k    s|4|3k    r|                    |"           |"tb          v r |2                    tb          |"                    |#(|&s&|3                    tO          |#          |9g df           t                              t          d9|"|4tS          |9d(z  d)*                     |	r5|"dd|d.d/fv r,|5s*t!          | |"||$g |#|&          }2|"|k    r|2}n|"dk    r|2}n|2}t                              t          d:|"tS          |9d(z  d)*                     |&stA          |"          }:ntC          |"          }:|:rAt                              t          d;                    |"tG          |:                               g };|"dk    rR|6D ]?}8ti          |8||:rd<                    |:          nd          }<|;                    |<           @tk          |;          }=ntk          |;          }=|=r4t                              t          d=                    |=|"                     t!          | |"|9|$|=|du s|"|ddfv r|#nd|&          }>|                    |>           |#(|&s&|3                    tO          |#          |9|=d2f           |r|&s|9d>k     r|dz  }|"|ddfv r|9d+k     r|9dk    rmt                              d,|>j*                   |r9t                              t                     t          
                    |
           t          |>g          c S |                    |>           t          |          r|||v rd|v rd|v r|+                                }1t                              d,|1j*                   |r9t                              t                     t          
                    |
           t          |1g          c S |si|&sg|=rtY          d? |=D             d@          nd}?|?dAk    rBd|v r>d|v r:d2}|2                    |:           t                              t          dB|"|9|?           |so|&rm|*rk|#it          |#          |dCz  k     rS|"dDvrOd|v rKd|v rGd2}t                              t          dE|"|9t          |#          |t          |#          |z  d(z             |"| k    rnt                              dF|"           |r9t                              t                     t          
                    |
           t          ||"         g          c S t          |          dk    r|s|s|r t                              t          dG           |r6t                              dH|j*                   |                    |           n{|r||r|r|j6        |j6        k    s|0t                              dI           |                    |           n1|r/t                              dJ           |                    |           |rDt                              dK|+                                j*        t          |          dz
             nt                              dL           |r9t                              t                     t          
                    |
           |S )Maf  
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
    but never take it for granted. Can improve the performance.

    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
    purpose.

    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
    Custom logging format and handler can be set manually.
    z3Expected object of type bytes or bytearray, got: {}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, c                .    g | ]}t          |d           S Fr   .0cps     T/var/www/html/volatility/venv/lib/python3.11/site-packages/charset_normalizer/api.py
<listcomp>zfrom_bytes.<locals>.<listcomp>s   "    DDD	"e,,DDD    zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.c                .    g | ]}t          |d           S r5   r6   r7   s     r:   r;   zfrom_bytes.<locals>.<listcomp>~   r<   r=   z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.   zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.zY%s is deemed too similar to a code page that was already considered unsuited. Continuing!zESkipping %s: already fast-tracked from a similar successful encoding.z2Encoding %s does not provide an IncrementalDecoderzbSkipping %s: definitive match already found, this encoding targets different languages (%s vs %s).zXSkipping %s: already accumulated %d same-family results after definitive match (cap=%d).zCSkipping single-byte %s: multi-byte definitive match already found.g    A)encodingrC   u   ﻿z9Code page %s does not fit given bytes sequence at ALL. %szpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.)preemptive_declarationzM%s fast-tracked (identical decoded payload to a prior encoding, chaos=%f %%).d      )ndigitsr    z.Encoding detection: %s is most likely the one.zZ%s fast-skipped (identical decoded payload to a prior encoding that failed chaos probing).rA   rB         TzaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.z=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}g{Gz?c              3      K   | ]	\  }}|V  
d S )N )r8   _vs      r:   	<genexpr>zfrom_bytes.<locals>.<genexpr>  s&      4441aQ444444r=   )defaultg      ?zyDefinitive match found: %s (chaos=%.3f, coherence=%.2f). Encodings targeting different language families will be skipped.g\(\?>	   rC   r2   rA   rB   	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigzjMulti-byte definitive match: %s (chaos=%.3f, decoded=%d/%d=%.1f%%). Single-byte encodings will be skipped.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)7
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerr   r   logjoinr$   r   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorr   r	   intersectionstrUnicodeDecodeErrorLookupErrorrangehashgetroundrD   bestmaxr   r   decodesumr   update
setdefaultr   r
   fingerprint)@r!   r#   r%   r&   r(   r*   r+   r-   r.   r/   previous_logger_levellengthis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failuresoft_failure_skipsuccess_fast_trackedpayload_result_cachedefinitive_match_founddefinitive_target_languages post_definitive_sb_success_countPOST_DEFINITIVE_SB_CAPmb_definitive_match_foundfallback_asciifallback_u8fallback_specifiedresultsearly_stop_resultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderenc_languageser_multi_byte_bonuspayload_hashcachedcached_mess	cached_cdcached_passed
fast_matchprobable_resultfallback_entrymax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiotarget_languages	cd_ratioschunk_languagescd_ratios_mergedcurrent_matchbest_coherences@                                                                   r:   
from_bytesr   9   s   < i)U!344 
AHHY 
 
 	
  %+\/***i..F{{STTT 	3  111OO1222|IwUBPRSSTUUU

5IIl##		
 	
 	
 ED|DDD

6IIl##		
 	
 	
 ED|DDD*u$%%

l	
 	
 	
 
qyyVe^j00%((
"%i..3E"E"%i..4D"D 


LSS 	
 	
 	
 	
 
 


W^^ 	
 	
 	
 (* .BKy)))t  %$$%7888

N	
 	
 	
 uuF)+)+"%%%%(UU
 TV $),/EE -.$"# ',*.N'+K.2,..G)7)9)9 3I > >L+$$\222

W		
 	
 	
   )))+++$$W---.1HH D	< D	< 	M== 	M\99F""

=!!!&*%1]%B!5 "
:Q;
 ;
 0009M0JJn  
 I%%.B%JJd  
  ---JJk  
  000JJW  
 	*@*O*O!!#[1 	 	 	JJD  
 H	 " 	( J #$6}$E$E F F #$9-$H$H I I --.IJJ 

x!!/    #	)	 14JJJJJj0&   
 % 	-B 	JJU  
 *	$ )>%)G)G ,u44 "+CII+..&s;'7'7#d))'CD*     !G++0D+&)!!.' ' 'O ' >?1+=+I+I*9!""*=&)  0588 &I!*3{+;+;+=+=!>!.' ' 'O #K0 		 		 		a-- 

O!FF	   $**=999HHHH		 )?AAs;/?/?
 
 " .t+.O$$v- 	  	JJ-	   &/D& $_ 5 5L)--l;;F!8>5Y  [!-!%#,! !6 > >#0$6#I$J $J ,O "&/A" " "J" NN:...(,,];;;JJg%kC/;;;	   &*<gw)OOO'#--&#--"LL P * 3    ' G & 4 4_ E E E &0E F F F#1:,#?#????*11*=== .//A/7;MQW;W;W#v--#v--8J8O8O8Q8QL+4   # C"00AAA"OO,ABBB-.?@@@@@ ,22=AAAJJt%   ' 9=*  = , , *6%)%0+3E* * * ),>>>1?..*g55-;NN*8K!$SWWq[!1!1 1155 ! %!		'	),$ %
 
     '''  !4GA\1B1B,G,G,G,Ga,G,G,G,G    R=I--$)$$(999( :-=-F-FE
	) 
	) 
	) JJsA	    1$(!!!!!!
	) &	%	 *	

#d))++&--mH-MMMM%   

t!FF	   (..}=== EN!VY#i..!@!@SVi''+;?P+P+P#**=999 666!(()?)NOOO *3H*$//))OR+G   JJ0 o+Q777    1!W&8(HMN N-N ".!(#+=" " " !$666)7&&"g--%3NN"0K

K/C'333		
 	
 	
 % 	D*<]*K*K4]CC 	JJ8??!3'7#8#8    	 G## # 2 2"1&2BLCHH-...# #   11115i@@5i@@ 	JJ299$m    %  *U22$);Wg(NNN  
 #5
 
 
" 	}%%% &/D& ++_%% "2D9   #	2)	2  $&&,1, 0'7CCC#%% #%%D!*    ;((999OO$9:::%}o66666%%m444 "##	5#+/AV/K/K6!!6!!05577OLL@(    7$$_555 5666!?"344444 & 	.C 	 $44#3444cBBBB 
 $$F):):w&?P?P)-&+223CDDD

 P!#"   *	%	 !	  +O$$v}44
  6!!6!!(,%JJ|O$$O$$v-3   L((LL1  
  7$$_555 5666!7=#9":;;;;; ) 7||q 	. 	,> 	JJa  
  	+LLI"+   NN-....	++3 4 # 4  +~/III'LLUVVVNN;'''' 	+LLUVVVNN>*** VkLLNN#LL1	
 	
 	
 	
 	TUUU /_----...Ns^    S2TTB3Z\ A[;;\ Bj
k3kk%,l
m+Am&&m+fpr   c
                V    t          |                                 |||||||||	
  
        S )z
    Same thing than the function from_bytes but using a file pointer that is already ready.
    Will not close the file pointer.
    )r   read)
r   r#   r%   r&   r(   r*   r+   r-   r.   r/   s
             r:   from_fpr   `  s<      
		  r=   pathstr | bytes | PathLikec
                    t          | d          5 }
t          |
|||||||||	
  
        cddd           S # 1 swxY w Y   dS )z
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
    Can raise IOError.
    rbN)openr   )r   r#   r%   r&   r(   r*   r+   r-   r.   r/   r   s              r:   	from_pathr   ~  s      
dD		 
R 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   6::fp_or_path_or_payload!PathLike | str | BinaryIO | bytesc
                   t          | t          t          f          rt          | |||||||||	
  
        }
nOt          | t          t
          f          rt          | |||||||||	
  
        }
nt          | |||||||||	
  
        }
|
 S )a)  
    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
    )	r#   r%   r&   r(   r*   r+   r-   r.   r/   )rZ   rp   r   r   r\   r[   r   r   )r   r#   r%   r&   r(   r*   r+   r-   r.   r/   guessess              r:   	is_binaryr     s    " '#x99 ,
!!%%!51+
 
 
 
	

 
 
 !!%%!51+
 
 
 !!%%!51+
 
 
 ;r=   )	r   r   r   NNTFr    T)r!   r"   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r   )r   r   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r   )r   r   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r   )	r   r   r   NNTFr    F)r   r   r#   r$   r%   r$   r&   r'   r(   r)   r*   r)   r+   r,   r-   r,   r.   r'   r/   r,   r0   r,   )/
__future__r   loggingosr   typingr   cdr   r   r	   r
   constantr   r   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   	getLoggerr`   StreamHandlerrc   setFormatter	Formatterr   __annotations__r   _supported_encrj   rn   r   r   r   r   r   rP   r=   r:   <module>r      sB   " " " " " " "                                            0 0 0 0 0 0 0 0                
	/	0	0'''))   GABB           $ - -N-!!.11 	1  0000  000 - - -^,,,,,- &3]%B  B B B B
 %)%)!% # d d d d dR %)%)!% #     @ %)%)!% # 
 
 
 
 
B %)%)!% #!? ? ? ? ? ? ?s   %6CC98C9