Pandas Initiation

1 minute read

Published:

Import packages


import numpy as np
import pandas as pd
import os
from astropy.table import Table

from scipy.stats import spearmanr#
from scipy.stats.stats import pearsonr

from astropy.table import Table
from astropy.cosmology import FlatLambdaCDM,Planck13,Planck15,z_at_value
from astropy import units as u
from astropy.cosmology import LambdaCDM
cosmo = LambdaCDM(H0=70, Om0=0.27, Ode0=0.73)
from astropy.time import Time
from astropy.time import TimeYearDayTime


from datetime import datetime
import time
from time import strftime,strptime
import calendar
from dateutil.parser import parse

#from adjustText import adjust_text
import matplotlib as mpl
import matplotlib.pyplot as plt
from pylab import cm
from collections import OrderedDict
#from adjustText import adjust_text

%matplotlib inline
%config InlineBackend.figure_format='svg'
# Edit the font, font size, and axes width
mpl.rcParams['font.family'] = 'Times New Roman' #'Avenir'
plt.rcParams['font.size'] = 18
plt.rcParams['axes.linewidth'] = 2

Def sort_data

import pandas as pd
import numpy as np
import os

def get_obsids(path):
    dirname=os.listdir(path)
    obsids=[]
    for i in dirname:
        if i.isdigit():
            obsids.append(i)
    obsids.sort()        
    return obsids

def drop_index(data):
    data=data.reset_index(drop=True)
    return data

def get_info(data,label,label_err=None):
    return min(data[label]),max(data[label]),np.mean(data[label])
    

pandas loaddata

data_path='/path/data.csv'
data=pd.read_csv(data_path,header=0,delimeter=',')
print(data.columns)

data_path='/path/data.txt'
data=pd.read_csv(data_path,header=None,delimeter='\s+')
#data.columns=[]

data_path='/path/data.xlsx'
data=pd.read_excel(data_path,header=0,sheet_name='sheet1')


data_path='/path/data.dat'
readme_path='/path/readme'

Tabledata=Table.read(data_path,
                      readme=readme_path,
                      format="ascii.cds",)
                      
html_url='website_url'
data_table_html=pd.read_html(html_url)                      
                      
                      
                      
data_path='/path/data.dat'
data=pd.read_excel(data_path,header=0,delimeter='\s+')

pandas writedata

data.to_csv(data_path,index=False)

data.to_excel(data_path)

json

from __future__ import unicode_literals
import json
import numpy as np
import pickle
with open("Source_Name_dict_pickle.json", "wb") as f:# writr dict
    pickle.dump(Source_Name_dict , f,pickle.HIGHEST_PROTOCOL)


with open('Source_Name_dict_pickle.json', "rb") as f:# load dict
    Source_Name_dict_load_pickle = pickle.load(f)
    
Source_Name_dict_load_pickle_Name2url  = dict(zip(Source_Name_dict_load_pickle .values(), Source_Name_dict_load_pickle .keys())) 

with open("Source_Name_dict_pickle.json", "wb") as f:# writr dict
    pickle.dump(Source_Name_dict , f,pickle.HIGHEST_PROTOCOL)
    

Table

hdu = fits.BinTableHDU(data=newdata,header=hdr,name='data')
hdu.writeto(path+'qsopar.fits',overwrite=True)

import numpy as np

gtipath='/Volumes/Brettlv_G_m/Astro_sources_data/V404cyg/V404cyg//nustar_v404/90102007002/pipeline_out/V404cygA01_gti.fits'
gti=fits.open(gtipath)
headergti=gti[0].header

gtitime=gti[1].data
gti_made = np.rec.array([(1,2),(3,5),(12,100)],
                      formats='float64,float64',
                      names='START,STOP')
gti[1].data=gti_made   
gti.writeto('newgti.fits')



from astropy.table import Table
tbdata = Table.read(path+"qsopar.fits")
tbdata[:12]


get_quasi_sim_data

def get_quasi_sim_lum_correlation(time1,Lum1,time2,Lum2,timebin=1):
    time1s=[]
    Lum1s=[]
    time2s=[]
    Lum2s=[]
    
    plot_laged_lum=pd.DataFrame([])    
    for index_,(time_,lum_) in enumerate(zip(time1,Lum1)):
        
        mjd_near_i=np.argmin(abs(time2-time_))  
        mjd_near=time2[mjd_near_i]        
        Lum2_near=Lum2[mjd_near_i]
             
        if abs(mjd_near-time_)<timebin:
            time1s.append(time_)
            Lum1s.append(lum_)
            
            time2s.append(mjd_near)
            Lum2s.append(Lum2_near)                    
    return  np.array(time1s),np.array(Lum1s),np.array(time2s),np.array(Lum2s)     
    
def get_lag_correlation(t1,y1,t2,y2,lagtime,timebin=1):
    lagged_t2=t2-lagtime   
    new_t1,new_y1,new_t2,new_y2= get_quasi_sim_lum_correlation(t1,y1,lagged_t2,y2,timebin=timebin)
    return new_t1,new_y1,new_t2,new_y2  
    
        
def get_quasi_sim_lum3(time1,Lum1,time2,Lum2,time3,Lum3,timebin=1):#base on time1
    time1s=[]
    Lum1s=[]
    time2s=[]
    Lum2s=[]
    time3s=[]
    Lum3s=[]
    
    plot_laged_lum=pd.DataFrame([])   
     
    for index_,(time_,lum_) in enumerate(zip(time1,Lum1)):       
        mjd_near_i=np.argmin(abs(time2-time_))  
        mjd_near=time2[mjd_near_i]         
        Lum2_near=Lum2[mjd_near_i]
        
        mjd_near_i3=np.argmin(abs(time3-time_))  
        mjd_near3=time3[mjd_near_i3]         
        Lum3_near=Lum3[mjd_near_i3]
        
        if abs(mjd_near-time_)<timebin and abs(mjd_near3-time_)<timebin:
            time1s.append(time_)
            Lum1s.append(lum_)           
            time2s.append(mjd_near)
            Lum2s.append(Lum2_near)           
            time3s.append(mjd_near3)
            Lum3s.append(Lum3_near) 
            
    return  np.array(time1s),np.array(Lum1s),np.array(time2s),np.array(Lum2s),np.array(time3s),np.array(Lum3s)                  
     

欢迎关注微信公众号:曜灵集