{"id":10918,"date":"2024-04-06T09:17:06","date_gmt":"2024-04-06T16:17:06","guid":{"rendered":"https:\/\/mattfife.com\/?p=10918"},"modified":"2024-03-16T14:42:39","modified_gmt":"2024-03-16T21:42:39","slug":"emote-portrait-alive-generating-expressive-portrait-videos-with-audio2video-diffusion-model-under-weak-conditions","status":"publish","type":"post","link":"https:\/\/mattfife.com\/?p=10918","title":{"rendered":"Emote Portrait Alive: Generating Expressive Portrait Videos with Audio2Video Diffusion Model under Weak Conditions"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">EMO is an audio-driven portrait-to-video generation framework. Input a single reference image and the vocal audio, e.g. talking and singing, and generate vocal avatar videos with expressive facial expressions, various head poses. Generate any duration depending on the length of input video.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">You can make still images talk, sing, or cross-actor operations in which still images can deliver the performances of other actors or in different languages.<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">It reminds me of other AI facial animation software like <a href=\"https:\/\/www.myheritage.com\/deep-nostalgia\" data-type=\"link\" data-id=\"https:\/\/www.myheritage.com\/deep-nostalgia\">MyHeritage&#8217;s Deep Nostalgia<\/a>.<\/p>\n\n\n\n<figure class=\"wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"wp-block-embed__wrapper\">\n<span class=\"embed-youtube\" style=\"text-align:center; display: block;\"><iframe loading=\"lazy\" class=\"youtube-player\" width=\"640\" height=\"360\" src=\"https:\/\/www.youtube.com\/embed\/VlJ71kzcn9Y?version=3&#038;rel=1&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;fs=1&#038;hl=en-US&#038;autohide=2&#038;wmode=transparent\" allowfullscreen=\"true\" style=\"border:0;\" sandbox=\"allow-scripts allow-same-origin allow-popups allow-presentation allow-popups-to-escape-sandbox\"><\/iframe><\/span>\n<\/div><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">Project: <a href=\"https:\/\/humanaigc.github.io\/emote-portrait-alive\/\">https:\/\/humanaigc.github.io\/emote-portrait-alive\/<\/a><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">Github: <a href=\"https:\/\/github.com\/HumanAIGC\/EMO\">https:\/\/github.com\/HumanAIGC\/EMO<\/a><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n","protected":false},"excerpt":{"rendered":"<p>EMO is an audio-driven portrait-to-video generation framework. Input a single reference image and the vocal audio, e.g. talking and singing, and generate vocal avatar videos with expressive facial expressions, various head poses. Generate any duration depending on the length of input video. You can make still images talk, sing, or cross-actor operations in which still images can deliver the performances of other actors or in different languages. It reminds me of other AI facial animation software like MyHeritage&#8217;s Deep Nostalgia&#8230;.<\/p>\n<p class=\"read-more\"><a class=\"btn btn-default\" href=\"https:\/\/mattfife.com\/?p=10918\"> Read More<span class=\"screen-reader-text\">  Read More<\/span><\/a><\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2},"jetpack_post_was_ever_published":false},"categories":[28,9],"tags":[],"class_list":["post-10918","post","type-post","status-publish","format-standard","hentry","category-ai","category-cool"],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p4WECr-2Q6","jetpack-related-posts":[],"_links":{"self":[{"href":"https:\/\/mattfife.com\/index.php?rest_route=\/wp\/v2\/posts\/10918","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/mattfife.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/mattfife.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/mattfife.com\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/mattfife.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=10918"}],"version-history":[{"count":3,"href":"https:\/\/mattfife.com\/index.php?rest_route=\/wp\/v2\/posts\/10918\/revisions"}],"predecessor-version":[{"id":10934,"href":"https:\/\/mattfife.com\/index.php?rest_route=\/wp\/v2\/posts\/10918\/revisions\/10934"}],"wp:attachment":[{"href":"https:\/\/mattfife.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=10918"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/mattfife.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=10918"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/mattfife.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=10918"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}