echo "Convertir un document word en html propre"
ls *.doc | while read i
do
#read i
wvHtml "$i" tmp.html
cat tmp.html | sed 's|^.*html>||g' | sed 's|^.*head>||g' | sed 's|<META.*$||g' | sed 's|<body.*||g' | sed 's|^.*/body>||g' | sed 's|^.*title>||g' | sed 's|^.*DOCTYPE.*$||g' | sed 's|<div.*>||g' | sed 's|<p>||g' | sed 's|<p .*>|<p>|g' | sed 's|</div>||g' | sed 's|<i>|<em>|g' | sed 's|</i>|</em>|g' | sed 's|<b>|<strong>|g' | sed 's|</b>|</strong>|g' | sed 's|<!--.*||g' | sed 's|-->||g' | sed 's|<hr>||g' | sed 's|<address>||g' | sed 's|</address>||g' | sed 's|<a href.*||g' | sed 's|src=.*||g' | sed 's|align=.*||g' | sed 's|^Document created with.*||g' | sed '$!N;s/\n//' | sed '$!N;s/\n//' | sed '1d' | sed '1d' | sed 's|<p></p>||g' | sed '/^$/d' | sed '/^ *$/d' | sed '/./!d' > t && mv t tmp.html
done
geany tmp.html
#! /usr/bin/bash
# [[word2html]] Script to automatically convert word doc files to html files
# Usage:
# Required: http://wvware.sourceforge.net/
# apt-get install wv
#
# Authors:
# FR, radeff@akademia.ch
# History
# 10 mai 2007, 20:25:45: FR, created
#########
echo "Script to automatically convert word doc files to html files"
echo "************"
WD=pwd
echo "Now converting all files under" .$WD
echo "************"
declare -i j
j=0
find . -name "*.doc" | while read i
do
j=$j+1
echo "$j Converting wvText $i TO $i.txt"
wvText "$i" "$i.txt"
done
echo "************"
#echo "Finished, $k files converted"
echo "Finished, all files converted"