将列表合并到单列datafram时无值填充

def apwords(words): filtered_sentence = [] words = word_tokenize(words) for w in words: filtered_sentence.append(w) return filtered_sentence addwords = lambda x: apwords(x) clean = data['Clean_addr'].apply(addwords) clean =list(clean) bigram = Phrases(clean, min_count=150, threshold=2) bigrams = Phraser(bigram) x=[] for i in clean: x.append(bigrams[i]) y=pd.DataFrame(x) data['Phrases_Clean_Addr']=y.apply(lambda x: ' '.join(x.astype(str)), axis=1)

[['robeco', 'des','voeux', 'rd','central','f','man','yee','building','room','central'], ['nikko','asset','management','hk','limi','f','man','yee','building','des','voeux','rd','central'], ['cfa','institute','office','f','man','yee','building','des','voeux','rd','central'], ['victon','registrations','ltd','room','f','regent','centre','queens','rd','central','central'], ['ding','fung','ltd','room','crawford','house','queens','rd','central','central'], ['quam','ltd','queens','rd','central','th','th','floors','china','building'] ['f', 'des', 'voeux', 'rd', 'central'], ['f', 'wincome', 'centre', 'des', 'voeux', 'rd', 'central'], ['ags', 'f', 'chuangs', 'tower', 'connaught', 'rd', 'central']]

robeco des_voeux rd central f man yee building room central None None None None None None None None None None nikko asset management hk limi f man yee building des_voeux rd central None None None None None None None None cfa institute office f man yee building des_voeux rd central None None None None None None None None None None victon registrations ltd room f regent centre queens_rd central central None None None None None None None None None None ding fung ltd room crawford house queens_rd central central None None None None None None None None None None None quam ltd queens_rd central th th floors china building None None None None None None None None None None None canara bank aon china bldng queens_rd centeal central None None None None None None None None None None None None gia room f aon china building queens_rd central None None None None None None None None None None None None zaaba capital ltd_unit b f china building queens_rd central None None None None None None None None None None None firestar diamond hk nd_floor new henry house ice house rd None None None None None None None None None None

robeco des_voeux rd central f man yee building room central nikko asset management hk limi f man yee building des_voeux rd central cfa institute office f man yee building des_voeux rd central victon registrations ltd room f regent centre queens_rd central central ding fung ltd room crawford house queens_rd central central quam ltd queens_rd central th th floors china building

1条回答

网友
1楼 · 发布于 2024-06-08 23:20:55

这是预期的行为，因为您从大小不等的列表列表创建了数据帧。在您的示例中，x中列表的最大长度是13，因此数据帧y包含13列。对于少于13个条目的任何行的元素，都会填充NA值
要获得所请求的输出，只需将dropna添加到apply函数中
data['Phrases_Clean_Addr']=y.apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1)
所以完整的解决方案是
x = [['robeco', 'des','voeux', 'rd','central','f','man','yee','building','room','central'],['nikko','asset','management','hk','limi','f','man','yee','building','des','voeux','rd','central'],['cfa','institute','office','f','man','yee','building','des','voeux','rd','central'],['victon','registrations','ltd','room','f','regent','centre','queens','rd','central','central'],['ding','fung','ltd','room','crawford','house','queens','rd','central','central'],['quam','ltd','queens','rd','central','th','th','floors','china','building'],['f', 'des', 'voeux', 'rd', 'central'],['f', 'wincome', 'centre', 'des', 'voeux', 'rd', 'central'],['ags', 'f', 'chuangs', 'tower', 'connaught', 'rd', 'central']] y = pd.DataFrame(x) z = y.apply(lambda x: ' '.join(x.dropna().astype(str)), axis=1) >>> z.values array(['robeco des voeux rd central f man yee building room central', 'nikko asset management hk limi f man yee building des voeux rd central', 'cfa institute office f man yee building des voeux rd central', 'victon registrations ltd room f regent centre queens rd central central', 'ding fung ltd room crawford house queens rd central central', 'quam ltd queens rd central th th floors china building', 'f des voeux rd central', 'f wincome centre des voeux rd central', 'ags f chuangs tower connaught rd central'], dtype=object)

相关问题更多 >

编程相关推荐

热门问题

热门文章