a
    f}rc                     @   s   d Z ddlZddlZddlZddlZddlZddlZddlZg dZdd Z	dd Z
dd	 Zd
d Zdd Zdd Zg g fddZdd ZdS )z%Generate, sanitize and parse surveys.    N) ResponsezOpen-Ended Responsec                 C   sN   t jdd| }tjt j|ddd}|d}tt j|d}||fS )z>Gets survey dataframe from .csv while dropping hidden columns.dbsurveys
survey.csvF)	index_colr   zproperties.json)ospathjoinpandasread_csvfillnautilssafejsonload)namebasesurvey
properties r   /var/www/vevn/surveyparser.py
get_survey	   s
    
r   c                 C   s    t jdd| d}t|}|S )Nr   r   metadata.jsonr   r	   r
   r   r   )r   r	   metadatar   r   r   get_metadata   s    
r   c                 C   s    t jdd| d}t|}|S )Nr   r   zreadings.jsonr   )r   r	   readingsr   r   r   get_readings   s    
r   c                 C   s   |D ]V}||d v rBdd ||  d }| j| | j| } q| j| | ||  } qt| j|d k r~| j| jdd | S )z6Filter survey given desired answers for each question.touchpoint-columnsz(?=.*z)(?=.*)zanon-matchesT)inplace)r
   locstrcontainsisinlenindexdrop)r   r   r   selected_answerscolumnZall_answers_regexr   r   r   filter_survey   s    r)   c                 C   s   t dd| } |  } | S )Nz + )resubstrip)stringr   r   r   sanitize.   s    r/   c                 C   s   d td | |   tS )N;)r
   filternotnullastyper!   xr   r   r   join_not_null3   s    r6   c                    s   fdd|D }g }t | dddd(}tj|ddd	}t|}W d
   n1 sT0    Y  i }tt|d D ]}t|d | |d |< t|d | |d |< |dkrd
n|d |d  }|d | dkr||d |< |d | tv r|d | |d |< |d | |kr8||d |  |d |  n|d | g||d | < |d | |v rr|d | |d |< qr|dd
 }|	d}	t
j||	d}
|
jddd}
|
j ddd}
 D ]}|	| q|D ]:}|
| jddddd }|
j|gdd}
||
|< qi }t|
D ]2\}}||vr|
jd
d
|f   ||< q|D ]}|| ||< |g||< qT|||d}|
|fS )zNParses and returns the survey as a pandas dataframe along with some
	metadata.c                    s   g | ]}| vr|qS r   r   ).0cdeletedr   r   
<listcomp>:       zparse_file.<locals>.<listcomp>rr   zutf-8)newlineencoding,")	delimiter	quotecharNr      )columnsall)howaxisignore)errorsrH   )levelrH   c                 S   s   | j tddS )NrD   rH   )applyr6   r4   r   r   r   <lambda>s   r<   zparse_file.<locals>.<lambda>rL   )
categoriesanswersr   )opencsvreaderlistranger$   r/   BLANKSappendpopr   	DataFramedropnar&   groupbyrM   	enumerateilocuniquetolist)sourceZtouchpointsr:   r   Zraw_filerawrO   Zcolumn_iZprev_categoryheadersZ	survey_dfr(   categorymergedrP   iZ
touchpointr   r   r9   r   
parse_file6   sT    & 	

"rf   c                 C   sF   t j|dd | jt j|dddd t j|d}t|| dS )	z%Writes parsed surveys to destination.T)exist_okr   Fzw+)r%   moder   r   )r   makedirsto_csvr	   r
   r   safejsondump)r   destinationr   r	   r   r   r   write   s
    rm   )__doc__r   rR   jsonr   tempfiler   r+   rV   r   r   r   r)   r/   r6   rf   rm   r   r   r   r   <module>   s   8	T