我想从简历、简历中提取特定部分

2024-03-28 23:35:42 发布

您现在位置:Python中文网/ 问答频道 /正文

我想从简历或简历中提取一个特定的部分…比如教育、经验。我这样做了,但当教育或任何其他部分写在简历的最后,它就不会起作用

` def extract_experience(ex_cl):   #create function of experience
     doc= fitz.open(ex_cl)   #open pdf file
     text=""             #crate string
     for page in doc:
     text= text + str(page.getText())  #conver pdf text into string
     words= nltk.word_tokenize(text)  #convert all text of CV into words

     start = 0
     end= 0

     #manually create [exp_list] which contain all CVs titles are possibel [not including 
     experience word](lan= german and English)

     exp_list=["FÄHIGKEITEN","KENNTNISSE","AUSBILDUNG","Ausbildung", "BILDUNG", "Bildung", 
                "Hobbies","HOBBIES","Personliche","Fahigkeiten",
                "Kenntnisse","Ehrenamtliches","Engagement",
                 "Sprachen","SPRACHEN","EHRENAMTLICHES",
                  "ENGAGEEMENT","EDUCATION" ,"Education","Hochschul",
                    "HOCHSCHUL","Studium","STUDIUM","Sprachkurse","Computerkenntnisse",
                  "SPRACHEN","SPRACHKURSE","COMPUTERKENNTNISSE", 
           "AWARDS","Awards","PERSONAL","Personal","Information", "INFORMATION",
           "SKILLS","Skills","SKILL","Skill",'Soziales']

     #manually create  [exp] list which contain experience title and also synonym words of 
     experiance word


     exp=['Erfahrung' ,'Laufbahn','ERFAHRUNG' ," Erfahrungen" ,'LAUFBAHN','Praktische',
                                                           
          'PRAKTISCHE','ERFAHRUNGEN','Praktika','PRAKTIKA' ,
         'Berufserfahrung' ,'EXPERIENCE','Experience' ,'BERÜFSERFAHRUNG','Berufserfahrung']

     for vari in words:        # Match experience word or synonym word from CV and manually 
                                                         created list[exp]  
        if vari in exp:         # if match then find index of that word
          st=words.index(vari)
          start= st+1           #(st+1)for take next word  
                            # get index of experience word of CV
          i = start             #give another variable(i)
     for j in words:                          #create for loop
        if words[i]  not in exp_list:   #if  start index is not in [exp_list(without 
                                                             experience 
                                                                                       word)] 
           i += 1                        #then take next index untill it match the word 
                                                                       of[exp_list]
           end= start+(i-start)               # find end index 
      
 
    

      f_list=[]  #create list
      for item in words[start:end]: #give slicing for take start index and end index
         f_list.append(item)  #append into list
      stringlist = ' '.join(f_list )  #convert into string


      return stringlist

extract_experience('020.pdf')

`