
    iC              
          U d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZmZmZm Z   ejB                  e"      Z#ejH                  e%d<    e
d       G d d             Z&de'e(   dee&   fdZ)de'e(   dee&   fdZ*d,de(de(de+e'e(   ee&   f   fdZ,de(fdZ-dddddd d!d"d#Z. G d$ d%e      Z/ G d& d'e      Z0 G d( d)      Z1d*e2ddfd+Z3y)-    N)Iterable)ThreadPoolExecutor)	dataclass)BaseHTTPRequestHandlerThreadingHTTPServer)parse_qsurlparse)
DictLoaderEnvironment)tabulate)get_world_sizetcpstore_client)build_db)	JobConfig)
CollectiveGroup
MembershipNCCLCallloggerT)slotsc                   .    e Zd ZU eed<   eed<   d Zd Zy)Responsestatus_codetextc                 l    | j                   dk7  r%t        d| j                    d| j                         y )N   zHTTP z: )r   RuntimeErrorr   selfs    [/var/www/html/engine/venv/lib/python3.12/site-packages/torch/distributed/debug/_frontend.pyraise_for_statuszResponse.raise_for_status"   s8    s"t'7'7&8499+FGG #    c                 @    t        j                  | j                        S N)jsonloadsr   r   s    r    r%   zResponse.json&   s    zz$))$$r"   N)__name__
__module____qualname__int__annotations__strr!   r%    r"   r    r   r      s    
IH%r"   r   urlsreturnc                     dd l d}dt        dt        ffd}t        |      5 }|j	                  ||       }d d d        |S # 1 sw Y   S xY w)Nr      urlr/   c                 f    j                  |       }t        |j                  |j                        S r$   )postr   r   r   )r2   resprequestss     r    getzfetch_thread_pool.<locals>.get0   s(    }}S!(($))44r"   )max_workers)r6   r,   r   r   map)r.   r8   r7   executorrespsr6   s        @r    fetch_thread_poolr<   *   sX    K5 5 5 
	4 (S$'( L( Ls   AAc                     dd l dj                  dt        dt        fddt        t           dt
        t           ffd}t        j                   ||             S )Nr   sessionr2   r/   c                    K   | j                  |      4 d {   }|j                          d {   }t        |j                  |      cd d d       d {    S 7 C7 -7 	# 1 d {  7  sw Y   y xY wwr$   )r4   r   r   status)r>   r2   r5   r   s       r    fetchzfetch_aiohttp.<locals>.fetch>   sc     <<$ 	/ 	/$DDKK.	/ 	/ 	/$	/ 	/ 	/ 	/sT   A7AA7A"AA"
A7A A7A" A7"A4(A+)A40A7r.   c           
         K   j                         4 d {   }t        j                  | D cg c]  } ||       c}  d {   cd d d       d {    S 7 Dc c}w 7 7 # 1 d {  7  sw Y   y xY wwr$   )ClientSessionasynciogather)r.   r>   r2   aiohttprA   s      r    rE   zfetch_aiohttp.<locals>.gatherC   st     ((* 	P 	Pg )N#%*=)NOO	P 	P 	P)NO	P 	P 	P 	Psa   A=AA=A(A
 A(A$A(A=A&A=A(&A=(A:.A1/A:6A=)rF   rC   r,   r   listr   rD   run)r.   rE   rF   rA   s     @@r    fetch_aiohttprI   :   sZ    /W22 / / /
P49 P(); P ;;vd|$$r"   endpointargsc                 6   t               }t        t                     D cg c]  }d| 	 }}|j                  |      }|D cg c]  }|j	                          d|  d|  }}	 t        |      }||fS c c}w c c}w # t        $ r t        |      }Y ||fS w xY w)Nrankz	/handler/?)r   ranger   	multi_getdecoderI   ImportErrorr<   )rJ   rK   storerkeysaddrsaddrr;   s           r    	fetch_allrX   J   s    E %n&6 781d1#J8D8OOD!EFKLdiz4&9LEL)e$ %< 9L  )!%(%<)s   A4A9%A> >BBblobc                 Z    t        j                  |       }t        j                  |d      S )N   )indent)r%   r&   dumps)rY   parseds     r    format_jsonr_   X   s!    ZZF::fQ''r"   a  
<!doctype html>
<head>
    <title>{% block title %}{% endblock %} - PyTorch Distributed</title>
    <link rel="shortcut icon" type="image/x-icon" href="https://pytorch.org/favicon.ico?">

    <style>
        body {
            margin: 0;
            font-family:
                -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,
                "Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji",
                "Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
            font-size: 1rem;
            font-weight: 400;
            line-height: 1.5;
            color: #212529;
            text-align: left;
            background-color: #fff;
        }
        h1, h2, h2, h4, h5, h6, .h1, .h2, .h2, .h4, .h5, .h6 {
            margin-bottom: .5rem;
            font-weight: 500;
            line-height: 1.2;
        }
        nav {
            background-color: rgba(0, 0, 0, 0.17);
            padding: 10px;
            display: flex;
            align-items: center;
            padding: 16px;
            justify-content: flex-start;
        }
        nav h1 {
            display: inline-block;
            margin: 0;
        }
        nav a {
           margin: 0 8px;
        }
        section {
            max-width: 1280px;
            padding: 16px;
            margin: 0 auto;
        }
        pre {
            white-space: pre-wrap;
            max-width: 100%;
        }
    </style>
</head>

<nav>
    <h1>Torch Distributed Debug Server</h1>

    <a href="/">Home</a> <!--@lint-ignore-->
    <a href="/stacks">Python Stack Traces</a> <!--@lint-ignore-->
    <a href="/pyspy_dump">py-spy Stacks</a> <!--@lint-ignore-->
    <a href="/fr_trace">FlightRecorder CPU</a> <!--@lint-ignore-->
    <a href="/fr_trace_json">(JSON)</a> <!--@lint-ignore-->
    <a href="/fr_trace_nccl">FlightRecorder NCCL</a> <!--@lint-ignore-->
    <a href="/fr_trace_nccl_json">(JSON)</a> <!--@lint-ignore-->
    <a href="/profile">torch profiler</a> <!--@lint-ignore-->
    <a href="/wait_counters">Wait Counters</a> <!--@lint-ignore-->
    <a href="/tcpstore">TCPStore</a> <!--@lint-ignore-->
</nav>

<section class="content">
  {% block header %}{% endblock %}
  {% block content %}{% endblock %}
</section>
    z
{% extends "base.html" %}
{% block header %}
  <h1>{% block title %}Index{% endblock %}</h1>
{% endblock %}
{% block content %}
Hi
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}{{title}}{% endblock %}</h1>
{% endblock %}
{% block content %}
    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <pre>{{ resp.text }}</pre>
        {% endif %}
    {% endfor %}
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}{{ title }}{% endblock %}</h1>
{% endblock %}
{% block content %}
    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <pre>{{ format_json(resp.text) }}</pre>
        {% endif %}
    {% endfor %}
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}torch.profiler{% endblock %}</h1>
{% endblock %}

{% block content %}
    <form action="" method="get">
        <label for="duration">Duration (seconds):</label>
        <input type="number" id="duration" name="duration" value="{{ duration }}" min="1" max="60">
        <input type="submit" value="Submit">
    </form>

    <script>
    function stringToArrayBuffer(str) {
        const encoder = new TextEncoder();
        return encoder.encode(str).buffer;
    }
    async function openPerfetto(data) {
        const ui = window.open('https://ui.perfetto.dev/#!/');
        if (!ui) { alert('Popup blocked. Allow popups for this page and click again.'); return; }

        // Perfetto readiness handshake: PING until we receive PONG
        await new Promise((resolve, reject) => {
        const onMsg = (e) => {
            if (e.source === ui && e.data === 'PONG') {
            window.removeEventListener('message', onMsg);
            clearInterval(pinger);
            resolve();
            }
        };
        window.addEventListener('message', onMsg);
        const pinger = setInterval(() => { try { ui.postMessage('PING', '*'); } catch (_e) {} }, 250);
        setTimeout(() => { clearInterval(pinger); window.removeEventListener('message', onMsg); reject(); }, 20000);
        }).catch(() => { alert('Perfetto UI did not respond. Try again.'); return; });

        ui.postMessage({
        perfetto: {
            buffer: stringToArrayBuffer(JSON.stringify(data)),
            title: "torch profiler",
            fileName: "trace.json",
        }
        }, '*');
    }
    </script>

    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <script>
            function run{{ i }}() {
                var data = {{ resp.text | safe }};
                openPerfetto(data);
            }
            </script>

            <button onclick="run{{ i }}()">View {{ i }}</button>
        {% endif %}
    {% endfor %}
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}TCPStore Keys{% endblock %}</h1>
{% endblock %}
{% block content %}
    <pre>
    {% for k, v in zip(keys, values) -%}
{{ k }}: {{ v | truncate(100) }}
    {% endfor %}
    </pre>
{% endblock %}
    ag  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}{{ title }}{% endblock %}</h1>
{% endblock %}
{% block content %}
    <h2>Groups</h2>
    {{ groups | safe }}
    <h2>Memberships</h2>
    {{ memberships | safe }}
    <h2>Collectives</h2>
    {{ collectives | safe }}
    <h2>NCCL Calls</h2>
    {{ ncclcalls | safe }}
{% endblock %}
    aQ  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}py-spy Stack Traces{% endblock %}</h1>
{% endblock %}
{% block content %}
    <form action="" method="get">
        <input type="checkbox" id="native" name="native" value="1"/>
        <label for="native">Native</label>
        <input type="checkbox" id="subprocesses" name="subprocesses" value="1"/>
        <label for="subprocesses">Subprocesses</label>
        <input type="submit" value="Submit">
    </form>

    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <pre>{{ resp.text }}</pre>
        {% endif %}
    {% endfor %}
{% endblock %}
    )z	base.html
index.htmlraw_resp.htmljson_resp.htmlprofile.htmltcpstore.htmlfr_trace.htmlpyspy_dump.htmlc                   R    e Zd ZU ej                  Zej                  ed<   dZe	ed<   y)_IPv6HTTPServeraddress_familyi   request_queue_sizeN)
r'   r(   r)   socketAF_INET6ri   AddressFamilyr+   rj   r*   r-   r"   r    rh   rh   J  s!    +1??NF((:""r"   rh   c            	       v    e Zd ZU ded<   d Zd ZdefdZdeee	e   f   fdZ
defdZd	efd
edededefdZy	)HTTPRequestHandlerFrontendServerfrontendc                 R    t         j                  d| j                  d   ||z         y )Nz%s %sr   )r   infoclient_address)r   formatrK   s      r    log_messagezHTTPRequestHandler.log_messageR  s&    "TM	
r"   c                 :    | j                   j                  |        y r$   )rq   _handle_requestr   s    r    do_GETzHTTPRequestHandler.do_GETY  s    %%d+r"   r/   c                 @    t        | j                        j                  S r$   )r	   pathr   s    r    get_pathzHTTPRequestHandler.get_path\  s    		"'''r"   c                 4    t        | j                               S r$   )r   get_raw_queryr   s    r    	get_queryzHTTPRequestHandler.get_query_  s    **,--r"   c                 @    t        | j                        j                  S r$   )r	   r{   queryr   s    r    r~   z HTTPRequestHandler.get_raw_queryb  s    		"(((r"   Nnamedefaulttypec                 J    | j                         }||vr|S  |||   d         S )Nr   )r   )r   r   r   r   r   s        r    get_query_argz HTTPRequestHandler.get_query_arge  s0      uNE$KN##r"   )r'   r(   r)   r+   rv   ry   r,   r|   dictrG   r   r~   objectr   r   r-   r"   r    ro   ro   O  so    
,(# (.4T#Y/ .)s ) ,0c$$"($7;$	$r"   ro   c                      e Zd ZdefdZddZddZdeddfdZd	e	d
e
defdZdedefdZdedefdZdedefdZdee	   dee   defdZdedefdZdedefdZdedefdZdedefdZdedefdZdedefdZdedefdZy)rp   portc                 p   t        t              }t        |d      | _        | j                  j                  j                  t        t        t               | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                  | j$                  d
| _        t)        dt*        fd| i      }d|f}t-        ||      | _        t1        j2                  | j4                  ddd	
      | _        | j6                  j9                          y )NT)loaderenable_async)zipr_   	enumerate)
/z/stacksz/pyspy_dumpz	/fr_tracez/fr_trace_jsonz/fr_trace_ncclz/fr_trace_nccl_jsonz/profilez/wait_countersz	/tcpstorero   rq    r-   z distributed.debug.FrontendServer)targetrK   daemonr   )r
   	templatesr   
_jinja_envglobalsupdater   r_   r   _handle_index_handle_stacks_handle_pyspy_dump_handle_fr_trace_handle_fr_trace_json_handle_fr_trace_nccl_handle_fr_trace_nccl_json_handle_profiler_handle_wait_counters_handle_tcpstore_routesr   ro   rh   _server	threadingThread_serve_threadstart)r   r   r   RequestHandlerClassserver_addresss        r    __init__zFrontendServer.__init__o  s   I&%V$G&&# 	' 	
 ##**22.."88"88#'#B#B--"88..
 # !
 d&~7JK '';;3	
 	r"   r/   Nc                     	 | j                   j                          y # t        $ r t        j	                  d       Y y w xY w)Nz got exception in frontend server)r   serve_forever	Exceptionr   	exceptionr   s    r    r   zFrontendServer._serve  s8    	ALL&&( 	A?@	As    >>c                 8    | j                   j                          y r$   )r   joinr   s    r    r   zFrontendServer.join  s    r"   reqc                    |j                         }|| j                  vr|j                  dd|        y | j                  |   }	  ||      }|j                  d       |j                  dd       |j                          |j                  j                  |       y # t        t        f$ r>}t
        j                  d|       |j                  ddt        |              Y d }~y d }~ww xY w)	Ni  zHandler not found: z-Exception in frontend server when handling %si  zException: r   zContent-typez	text/html)r|   r   
send_errorr   
SystemExitr   r   reprsend_responsesend_headerend_headerswfilewrite)r   r   r{   handlerr5   es         r    rx   zFrontendServer._handle_request  s    ||~t||#NN3"5dV <=,,t$		3<D 	#4		 :& 	? NN3+d1gY 78	s   B C)+4C$$C)templatekwargsc                 t     | j                   j                  |      j                  di |j                         S )Nr-   )r   get_templaterenderencode)r   r   r   s      r    _render_templatezFrontendServer._render_template  s0    <t++H5<<FvFMMOOr"   c                 $    | j                  d      S )Nr`   )r   )r   r   s     r    r   zFrontendServer._handle_index  s    $$\22r"   c                 H    t        d      \  }}| j                  dd||      S )Ndump_tracebackra   StackstitlerV   r;   rX   r   r   r   rV   r;   s       r    r   zFrontendServer._handle_stacks  s2     !12u$$85 % 
 	
r"   c                 d    t        d|j                               \  }}| j                  d||      S )N
pyspy_dumprf   rV   r;   )rX   r~   r   r   s       r    r   z!FrontendServer._handle_pyspy_dump  s<     s/@/@/BCu$$ % 
 	
r"   rV   r;   c                    t               }|j                  g       }d|_        d|_        i }t	        |      D ]?  \  }}|j                          |||   d|j                         }d|vrg |d<   ||d| d<   A t        t        |j                                     d   }	t        |||	      }
| j                  dd	t        |
j                  t        j                  d
      t        |
j                   t"        j                  d
      t        |
j$                  t&        j                  d
      t        |
j(                  t*        j                  d
            S )N)rK   T)rM   	host_nameentriesrM   z.jsonversionre   FlightRecorderhtml)headerstablefmt)r   groupsmembershipscollectives	ncclcalls)r   
parse_argsallow_incomplete_ranksverboser   r!   r%   nextitervaluesr   r   r   r   r   _fieldsr   r   r   r   r   r   )r   rV   r;   configrK   detailsrM   r5   dumpr   dbs              r    _render_fr_tracezFrontendServer._render_fr_trace  sE     b )&*##E* 		/JD$!!#"4[ ))+D
 $"$Y*.Gd4&&'		/ tGNN,-.y9gtW-$$"BIIu}}vN 
(:(:V !
(:(:V r||X5E5EPVW % 
 	
r"   c                 T    t        d      \  }}| j                  |t        |            S )Nfr_trace_jsonrX   r   rG   r   s       r    r   zFrontendServer._handle_fr_trace  s'     1u$$UDK88r"   c                 H    t        d      \  }}| j                  dd||      S )Nr   rb   r   r   r   r   s       r    r   z$FrontendServer._handle_fr_trace_json  s4     1u$$"	 % 
 	
r"   c                 V    t        dd      \  }}| j                  |t        |            S )Ndump_nccl_trace_jsononlyactive=truer   r   s       r    r   z$FrontendServer._handle_fr_trace_nccl   s+     !79JKu$$UDK88r"   c                 J    t        dd      \  }}| j                  dd||      S )Nr   r   rb   zFlightRecorder NCCLr   r   r   s       r    r   z)FrontendServer._handle_fr_trace_nccl_json  s8     !79JKu$$'	 % 
 	
r"   c                 ~    |j                  ddt              }t        dd|       \  }}| j                  d||      S )Ndurationg      ?)r   r   torch_profilez	duration=rc   r   )r   floatrX   r   )r   r   r   rV   r;   s        r    r   zFrontendServer._handle_profiler  sH    $$Z5$I IhZ2HIu$$^5$NNr"   c                 H    t        d      \  }}| j                  dd||      S )Nwait_counter_valuesrb   zWait Countersr   r   r   s       r    r   z$FrontendServer._handle_wait_counters  s2     !67u$$O5 % 
 	
r"   c                     t        d      }|j                         }|j                          |j                  |      D cg c]  }t	        |       }}| j                  d||      S c c}w )Nr   )prefixrd   )rU   r   )r   	list_keyssortrP   r   r   )r   r   rS   rU   vr   s         r    r   zFrontendServer._handle_tcpstore  s^    r* 		#(??4#89a$q'99$$_4$OO :s    A()r/   N)r'   r(   r)   r*   r   r   r   ro   rx   r,   r   bytesr   r   r   r   rG   r   r   r   r   r   r   r   r   r   r-   r"   r    rp   rp   n  s=   (S (TA#5 $ .P P P5 P3!3 3 3
"4 
 

&8 
U 
"
d3i "
X "
5 "
H9$6 95 9

); 
 
9); 9 9

.@ 
U 
O$6 O5 O
); 
 
P$6 P5 Pr"   rp   r   c                     t         j                  t        j                         t	        |       }t         j                  d|j                  j                         |j                          y )N)r   z"Frontend server started on port %d)	r   setLevelloggingINFOrp   rs   r   server_portr   )r   servers     r    mainr  $  s>    
OOGLL!&F
KK4fnn6P6PQ
KKMr"   )r   )4rD   r%   r   rk   r   collections.abcr   concurrent.futuresr   dataclassesr   http.serverr   r   urllib.parser   r	   jinja2r
   r   r   torch.distributed.debug._storer   r   4torch.distributed.flight_recorder.components.builderr   ;torch.distributed.flight_recorder.components.config_managerr   2torch.distributed.flight_recorder.components.typesr   r   r   r   	getLoggerr'   r   Loggerr+   r   rG   r,   r<   rI   tuplerX   r_   r   rh   ro   rp   r*   r  r-   r"   r    <module>r     sY        $ 1 ! C + *  J I Q  +**84 4 	% 	% 	%DI (8*<  %S	 %hx&8 %  3 d3i(AS6S0T (c (GP""?@ cj	Z#) #
$/ $>sP sPls t r"   