HTML Parsing to remove MS Word formatting from xml-rpc request

Try using CTRL+SHIFT+V when pasting content into the editing window of a post/page. That should strip out the formatting.

As for stripping the attribute after the fact, try this:

$content = remove_html_attribute('style', $content);
/**
 * To remove an attribute from an html tag
 * @param string $attr the attribute
 * @param string $str the html
 */
function remove_html_attribute($attr, $input){
    //return preg_replace('/\\s*'.$attr.'\\s*=\\s*(["\\']).*?\\1/', '', $input);


    $result='';


    if(!empty($input)){


        //check if the input text contains tags
        if($input!=strip_tags($input)){
            $dom = new DOMDocument();


            //use mb_convert_encoding to prevent non-ASCII characters from randomly appearing in text
            $dom->loadHTML(mb_convert_encoding($input, 'HTML-ENTITIES', 'UTF-8'));


            $domElement = $dom->documentElement;


            $taglist = array('span'); //tags to check for specified tag attribute


            foreach($taglist as $target_tag){
                $tags = $domElement->getElementsByTagName($target_tag);


                foreach($tags as $tag){
                    $tag->removeAttribute($attr);
                }
            }


            //$result =  $dom->saveHTML();
            $result = innerHTML( $domElement->firstChild ); //strip doctype/html/body tags
        }
        else{
            $result=$input;
        }
    }


    return $result; 
}


/**
 * removes the doctype/html/body tags
 */
function innerHTML($node){
  $doc = new DOMDocument();
  foreach ($node->childNodes as $child)
    $doc->appendChild($doc->importNode($child, true));


  return $doc->saveHTML();
}

However, this will still leave the <span> tags behind.