o
    Rgi=                     @   s   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlmZ	 ddl
mZ eeZeje dZdZe	jd	d
dd ZdS )    )	getLoggerN)datetime)
celery_app   )query_xai_apizintelligence.jsonhomogenization_statusz:keepa_deals.maintenance_tasks.homogenize_intelligence_task)namec                  C   s  t jtjj} | tt	dddd t
dt   t
dt  tjtsEdt }t
| | tt	d|d	 dS zttd
dd}t|}W d   n1 s^w   Y  |st| tt	ddd W dS i }|D ]&}t|trd|v r|d|t|d  < qxt|trd|t| < qxg }|D ] }t|trd|v r|t|d   q|t|  qd}t|}g }	t
d| d td||D ]}
||
|
|  }|
| d }|| d | }| tt	dd| d| ddd dt	| d}dddd|dgdd d!d"}t|}d#|v r;t
d$|
 d%|d#   |	| qzI|d& d d' d  }tjd(d)|tj d*}t!|}t|t"ru|	| t
d+|
 d,t| d-t|  nt
d.|
 d/ |	| W q tj#t$t%fy } zt
d0|
 d%|  |	| W Y d}~qd}~ww g }t&' (d1}|	D ]}t| }||}|||r|n|d2 qt|}|| }t
d3| d4| d5|  |dkr5t
d6t  z'ttd7dd}tj)||d8d9 W d   n	1 sw   Y  t
d: W n t*y4 } z
t
d;|  |d}~ww t
d< | tt	d|d=| d>d? |W S  t*yr } zt
d@|  | tt	dt|d	 |d}~ww )Az:Background task to homogenize intelligence.json using LLM.RunningzStarting...r   )statusprogressremoved_countz"Homogenization Task Started. CWD: zTarget Intelligence File: zFile not found: Error)r
   messagerzutf-8)encodingNComplete)r
   r   content
date_addedi  zStarting processing for z	 items...r   zProcessing batch z of z...a9  
            You are a strict data cleaner. Below is a JSON list of "intelligence" items.

            **CRITICAL INSTRUCTIONS:**
            1. Aggressively identify concepts that mean the same thing, even if phrased differently.
            2. Merge them into a SINGLE, concise entry.
            3. If two items share >50% conceptual overlap, KEEP ONLY THE BEST ONE.
            4. Your goal is to REDUCE the list size by removing redundancy.
            5. Return ONLY the final JSON list of strings. No markdown, no intro.

            **Input List:**
            z
            systemzYou are a data cleaner.)roler   userzgrok-4-fast-reasoningFg?)messagesmodelstreamtemperatureerrorz"xAI Error in homogenization chunk z: choicesr   z^```json\s*|\s*```$ )flagszChunk z
: Reduced z -> z0Homogenization returned non-list JSON for chunk .z0Error parsing homogenization response for chunk z%Y-%m-%d)r   r   z#Homogenization complete. Original: z	, Final: z, Removed: zWriting updated list to file: w   )indentzFile write successful.z#Failed to write intelligence file: z#No items removed. File not updated.zComplete! Semantically merged z duplicate ideas.)r
   r   r   zError in homogenization task: )+redisRedisfrom_urlceleryconf
broker_urlsetHOMOGENIZATION_STATUS_KEYjsondumpsloggerinfoosgetcwdINTELLIGENCE_FILEpathexistsr   openload
isinstancedictgetstrstripappendlenranger   extendresub	MULTILINEloadslistJSONDecodeErrorKeyError
IndexErrorr   nowstrftimedump	Exception)redis_client	error_msgfintelligencecontent_date_mapitemintelligence_content_list
CHUNK_SIZEtotal_originalall_cleaned_stringsichunkcurrent_chunk_numtotal_chunkspromptpayloadresultr   cleaned_chunkefinal_objects_list	today_strss_cleanoriginal_datefinal_countremoved	write_err rf   8/var/www/agentarbitrage/keepa_deals/maintenance_tasks.pyhomogenize_intelligence_task   s   











&






rh   )loggingr   r/   r+   r#   r?   r   workerr   r&   ava_advisorr   __name__r-   r2   joinr0   r1   r*   taskrh   rf   rf   rf   rg   <module>   s    
