Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
0% found this document useful (0 votes)
147 views

HTML Parser PHP Tutorial

This document contains PHP code to parse HTML tables and convert them into a database table format. It includes a HtmlParser class that parses HTML tags and elements. The htmlTabletoDb class contains a method to parse an HTML table passed to it and return an array of the table data, with each row represented as an array. Additional files include an HTML page to input a table and submit it, and a second page to map the table columns to database fields.

Uploaded by

zeroxcool4968
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
147 views

HTML Parser PHP Tutorial

This document contains PHP code to parse HTML tables and convert them into a database table format. It includes a HtmlParser class that parses HTML tags and elements. The htmlTabletoDb class contains a method to parse an HTML table passed to it and return an array of the table data, with each row represented as an array. Additional files include an HTML page to input a table and submit it, and a second page to map the table columns to database fields.

Uploaded by

zeroxcool4968
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 13

Htmltabletodbclass.

php
<?php
require('htmlparser.inc');
class htmlTabletoDb
{
function ParseTable($Table)
{
$_var='';
$htmlText = $Table;
$parser = new HtmlParser ($htmlText);
while ($parser->parse()) {
if(strtolower($parser->iNodeName)=='table')
{
if($parser->iNodeType == NODE_TYPE_ENDELEMENT)
$_var .='/::';
else
$_var .='::';
}
if(strtolower($parser->iNodeName)=='tr')
{
if($parser->iNodeType == NODE_TYPE_ENDELEMENT)
$_var .='!-:'; //opening row
else
$_var .=':-!'; //closing row
}
if(strtolower($parser->iNodeName)=='td' && $parser->iNodeType == NODE_TYPE_ENDELEMENT)
{
$_var .='#,#';
}
if ($parser->iNodeName=='Text' && isset($parser->iNodeValue))
{
$_var .= $parser->iNodeValue;
}
}
$elems = split(':-!',str_replace('/','',str_replace('::','',str_replace('!-:','',$_var)))); //opening row
foreach($elems as $key=>$value)
{
if(trim($value)!='')
{
$elems2 = split('#,#',$value);
array_pop($elems2);
$data[] = $elems2;
}
}
return $data;
}
}
?>

Htmlparser.inc
<?php
/*
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
* Redistribution of source must retain this copyright notice.
*
* Jose Solorzano (http://jexpert.us) is a software consultant.
*
* Contributions by:
* - Leo West (performance improvements)
*/
define ("NODE_TYPE_START",0);
define ("NODE_TYPE_ELEMENT",1);
define ("NODE_TYPE_ENDELEMENT",2);
define ("NODE_TYPE_TEXT",3);
define ("NODE_TYPE_COMMENT",4);
define ("NODE_TYPE_DONE",5);
/**
* Class HtmlParser.
* To use, create an instance of the class passing
* HTML text. Then invoke parse() until it's false.
* When parse() returns true, $iNodeType, $iNodeName
* $iNodeValue and $iNodeAttributes are updated.
*
* To create an HtmlParser instance you may also
* use convenience functions HtmlParser_ForFile
* and HtmlParser_ForURL.
*/
class HtmlParser {
/**
* Field iNodeType.
* May be one of the NODE_TYPE_* constants above.
*/
var $iNodeType;
/**
* Field iNodeName.
* For elements, it's the name of the element.
*/
var $iNodeName = "";
/**
* Field iNodeValue.
* For text nodes, it's the text.
*/
var $iNodeValue = "";
/**
* Field iNodeAttributes.
* A string-indexed array containing attribute values
* of the current node. Indexes are always lowercase.
*/
var $iNodeAttributes;
// The following fields should be
// considered private:

var $iHtmlText;
var $iHtmlTextLength;
var $iHtmlTextIndex = 0;
var $iHtmlCurrentChar;
var $BOE_ARRAY;
var $B_ARRAY;
var $BOS_ARRAY;
/**
* Constructor.
* Constructs an HtmlParser instance with
* the HTML text given.
*/
function HtmlParser ($aHtmlText) {
$this->iHtmlText = $aHtmlText;
$this->iHtmlTextLength = strlen($aHtmlText);
$this->iNodeAttributes = array();
$this->setTextIndex (0);
$this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
$this->B_ARRAY = array (" ", "\t", "\r", "\n" );
$this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
}
/**
* Method parse.
* Parses the next node. Returns false only if
* the end of the HTML text has been reached.
* Updates values of iNode* fields.
*/
function parse() {
$text = $this->skipToElement();
if ($text != "") {
$this->iNodeType = NODE_TYPE_TEXT;
$this->iNodeName = "Text";
$this->iNodeValue = $text;
return true;
}
return $this->readTag();
}
function clearAttributes() {
$this->iNodeAttributes = array();
}
function readTag() {
if ($this->iCurrentChar != "<") {
$this->iNodeType = NODE_TYPE_DONE;
return false;
}
$this->clearAttributes();
$this->skipMaxInTag ("<", 1);
if ($this->iCurrentChar == '/') {
$this->moveNext();
$name = $this->skipToBlanksInTag();
$this->iNodeType = NODE_TYPE_ENDELEMENT;
$this->iNodeName = $name;
$this->iNodeValue = "";
$this->skipEndOfTag();
return true;
}

$name = $this->skipToBlanksOrSlashInTag();
if (!$this->isValidTagIdentifier ($name)) {
$comment = false;
if (strpos($name, "!--") === 0) {
$ppos = strpos($name, "--", 3);
if (strpos($name, "--", 3) === (strlen($name) - 2)) {
$this->iNodeType = NODE_TYPE_COMMENT;
$this->iNodeName = "Comment";
$this->iNodeValue = "<" . $name . ">";
$comment = true;
}
else {
$rest = $this->skipToStringInTag ("-->");
if ($rest != "") {
$this->iNodeType = NODE_TYPE_COMMENT;
$this->iNodeName = "Comment";
$this->iNodeValue = "<" . $name . $rest;
$comment = true;
// Already skipped end of tag
return true;
}
}
}
if (!$comment) {
$this->iNodeType = NODE_TYPE_TEXT;
$this->iNodeName = "Text";
$this->iNodeValue = "<" . $name;
return true;
}
}
else {
$this->iNodeType = NODE_TYPE_ELEMENT;
$this->iNodeValue = "";
$this->iNodeName = $name;
while ($this->skipBlanksInTag()) {
$attrName = $this->skipToBlanksOrEqualsInTag();
if ($attrName != "" && $attrName != "/") {
$this->skipBlanksInTag();
if ($this->iCurrentChar == "=") {
$this->skipEqualsInTag();
$this->skipBlanksInTag();
$value = $this->readValueInTag();
$this->iNodeAttributes[strtolower($attrName)] = $value;
}
else {
$this->iNodeAttributes[strtolower($attrName)] = "";
}
}
}
}
$this->skipEndOfTag();
return true;
}
function isValidTagIdentifier ($name) {
return ereg ("^[A-Za-z0-9_\\-]+$", $name);
}
function skipBlanksInTag() {
return "" != ($this->skipInTag ($this->B_ARRAY));
}

function skipToBlanksOrEqualsInTag() {
return $this->skipToInTag ($this->BOE_ARRAY);
}
function skipToBlanksInTag() {
return $this->skipToInTag ($this->B_ARRAY);
}
function skipToBlanksOrSlashInTag() {
return $this->skipToInTag ($this->BOS_ARRAY);
}
function skipEqualsInTag() {
return $this->skipMaxInTag ("=", 1);
}
function readValueInTag() {
$ch = $this->iCurrentChar;
$value = "";
if ($ch == "\"") {
$this->skipMaxInTag ("\"", 1);
$value = $this->skipToInTag ("\"");
$this->skipMaxInTag ("\"", 1);
}
else if ($ch == "'") {
$this->skipMaxInTag ("'", 1);
$value = $this->skipToInTag ("'");
$this->skipMaxInTag ("'", 1);
}
else {
$value = $this->skipToBlanksInTag();
}
return $value;
}
function setTextIndex ($index) {
$this->iHtmlTextIndex = $index;
if ($index >= $this->iHtmlTextLength) {
$this->iCurrentChar = -1;
}
else {
$this->iCurrentChar = $this->iHtmlText{$index};
}
}
function moveNext() {
if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
$this->setTextIndex ($this->iHtmlTextIndex + 1);
return true;
}
else {
return false;
}
}
function skipEndOfTag() {
while (($ch = $this->iCurrentChar) !== -1) {
if ($ch == ">") {
$this->moveNext();
return;

}
$this->moveNext();
}
}
function skipInTag ($chars) {
$sb = "";
while (($ch = $this->iCurrentChar) !== -1) {
if ($ch == ">") {
return $sb;
} else {
$match = false;
for ($idx = 0; $idx < count($chars); $idx++) {
if ($ch == $chars[$idx]) {
$match = true;
break;
}
}
if (!$match) {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
}
return $sb;
}
function skipMaxInTag ($chars, $maxChars) {
$sb = "";
$count = 0;
while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
if ($ch == ">") {
return $sb;
} else {
$match = false;
for ($idx = 0; $idx < count($chars); $idx++) {
if ($ch == $chars[$idx]) {
$match = true;
break;
}
}
if (!$match) {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
}
return $sb;
}
function skipToInTag ($chars) {
$sb = "";
while (($ch = $this->iCurrentChar) !== -1) {
$match = $ch == ">";
if (!$match) {
for ($idx = 0; $idx < count($chars); $idx++) {
if ($ch == $chars[$idx]) {
$match = true;
break;

}
}
}
if ($match) {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
return $sb;
}
function skipToElement() {
$sb = "";
while (($ch = $this->iCurrentChar) !== -1) {
if ($ch == "<") {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
return $sb;
}
/**
* Returns text between current position and $needle,
* inclusive, or "" if not found. The current index is moved to a point
* after the location of $needle, or not moved at all
* if nothing is found.
*/
function skipToStringInTag ($needle) {
$pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
if ($pos === false) {
return "";
}
$top = $pos + strlen($needle);
$retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
$this->setTextIndex ($top);
return $retvalue;
}
}
function HtmlParser_ForFile ($fileName) {
return HtmlParser_ForURL($fileName);
}
function HtmlParser_ForURL ($url) {
$fp = fopen ($url, "r");
$content = "";
while (true) {
$data = fread ($fp, 8192);
if (strlen($data) == 0) {
break;
}
$content .= $data;
}
fclose ($fp);
return new HtmlParser ($content);
}
php?>

index.php
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>HTML <Table> To Database</title>
<script language="javascript">
function insertsample(form)
{
form.htmltable.value =
"<TABLE>\r<TR><TD>S.No.</TD><TD>Name</TD><TD>Age</TD><TD>Sex</TD><TD>Location</TD></TR>\r<TR><TD>1</TD><TD
>Azeem</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>2</TD><TD>Khurram</TD><TD>24</TD><TD>
Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>3</TD><TD>Mushhad</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></T
R>\r<TR><TD>3</TD><TD>Qamar</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r</TABLE>";
}
function validate(form)
{
if(frmhtmltabledb.htmltable.value==''){
alert('Please enter a HTML Table or Click "Fill Sample Text"');
return false;
}
}
</script>
</head>
<body>
<form name="frmhtmltabledb" action="mapcolumns.php" method="post" onSubmit="return validate(this.form);">
<table cellpadding="3" cellspacing="0" align="center" width="75%" bgcolor="#CCCCCC">
<tr>
<td colspan="2">HTML &lt;Table&gt; Here</td>
</tr>
<tr>
<td><textarea cols="75" rows="25" name="htmltable"></textarea></td>
<td valign="top"><input type="submit" value="Next >>"><br><input type="button" value="Fill Sample Text"
onClick="javascript:insertsample(this.form);"></td>
</tr>
</table>
</form>
</body>
</html>

Mapcolumns.php
<?php
require('htmltabletodb.class.php');
?>
<html>
<head>
<title>HTML &lt;Table&gt; To Database</title>
<script language="javascript">
function includeColumn(form,checkbox)
{
if(checkbox.checked == false){
form.elements[checkbox.id-1].disabled = true;
form.elements[checkbox.id-1].value ='';
}else{
form.elements[checkbox.id-1].disabled = false;
}
}
function insertsample(form)
{
form.dbTable.value='user_info';
form.elements[1].value = "user_id";
form.elements[3].value = "user_name";
form.elements[5].value = "user_age";
form.elements[7].value = "user_sex";
form.elements[9].value = "user_location";
//form.htmltable.value =
"<TABLE>\r<TR><TD>S.No.</TD><TD>Name</TD><TD>Age</TD><TD>Sex</TD><TD>Location</TD></TR>\r<TR><TD>1</TD><TD
>Azeem</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>2</TD><TD>Atiq</TD><TD>24</TD><TD>Male<
/TD><TD>Pakistan</TD></TR>\r<TR><TD>3</TD><TD>Shahid</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r</T
ABLE>";
}
function validate()
{
var i,isFilled = false;
for(i=1;i<(frmhtmltabletodb.elements.length-4);i+=2)
{
if(frmhtmltabletodb.elements[i].value!=''){
i = frmhtmltabletodb.elements.length;
isFilled = true;
}
}
if(frmhtmltabletodb.dbTable.value==''){
alert('Please Enter Table Name');
frmhtmltabletodb.dbTable.setfocus;
return false;
}else if(isFilled==false){
alert('Please Enter atleast one Database Table Field');
frmhtmltabletodb.elements[1].setfocus;
return false;
}
return true;
}
</script>
</head>
<body>
<?php
$objClass = new htmlTabletoDb();
$html = $_POST["htmltable"];
$totalColumns = 0;

$start = strpos(strtolower($html),'</tr');
$columns = substr($html,0,$start+5)."</table>";
$columns = substr_replace($columns,"<table cellspacing=0 width='50%' align='center' ",0,7);
$columns = str_replace("<td","<td Style=\"border:1px solid #000;\" align=\"center\"",strtolower($columns));
$columns = str_replace("<TD","<TD Style=\"border:1px solid #000;\" align=\"center\"",strtolower($columns));
$columns = str_replace("<Td","<Td Style=\"border:1px solid #000;\" align=\"center\"",strtolower($columns));
$arr_columns = $objClass->ParseTable($columns);
?>
<form name="frmhtmltabletodb" action="parse.php" method="post" onSubmit="return validate();">
<table cellpadding="0" cellspacing="0" width="75%" align="center" bgcolor="#CCCCCC">
<tr>
<td>Database Table Name</td>
</tr>
<tr>
<td><input type="text" name="dbTable" size="40"></td>
</tr>
</table><br>
<?php
foreach($arr_columns as $key =>$value)
{
echo "<table cellspacing=0 width='75%' align='center' bgcolor=\"#CCCCCC\"><tr><td colspan='3'>Column(s)
Name</td></tr><tr><td width=\"80%\"><table cellspacing=0 cellpadding='3' width='100%' align='center'><tr Style='backgroundcolor:#999999;'><td>#</td><td>HTML Table Column Name</td><td>Database Table Column Name</td><td>Insert</td>";
foreach($arr_columns[$key] as $innerkey=>$innervalue)
{
echo "<tr><td width=\"5%\" Style=\"border:1px solid #000;\">".($innerkey+1)."</td><td
Style=\"border:1px solid #000;\" >".$arr_columns[$key][$innerkey]."</td><td Style=\"border:1px solid #000;\" align=\"center\"
width=\"20%\"><input type=\"text\" Style=\"border:1px solid #000;\" size=20 name=\"column[]\"></td><td Style=\"border:1px
solid #000;\" align=\"center\" width=\"10%\"><input type=\"checkbox\" id=\"".($totalColumns+2)."\"
name=\"chk".$totalColumns."\" onClick=\"javascript:includeColumn(this.form,this);\" CHECKED></td></tr>";
$totalColumns+=2;
}
echo "</table></td><td valign=\"top\" width=\"20%\"><input type=\"submit\" value=\"Next >>\"><br><input
type=\"button\" value=\"Fill Sample Columns\" onClick=\"javascript:insertsample(this.form);\"><br><br><input type=\"hidden\"
name=\"execute\"></td></tr></table>";
}
?>
<BR>
<table cellpadding="0" cellspacing="0" width="75%" align="center" bgcolor="#CCCCCC">
<tr>
<td>Original Data</td>
</tr>
<tr>
<td align="center"><textarea cols="80" rows="25" name="htmltable"><?=$html?></textarea></td>
</tr>
</table>
</form>
</body>
</html>

Parse.php
<?php
require('htmltabletodb.class.php');
?>
<html>
<head>
<title>HTML &lt;Table&gt; To Database</title>
<script language="javascript">
function selectAll(theField) {
var tempval=eval("document."+theField)
tempval.focus()
tempval.select()
}
function copy_clip(meintext)
{
if (window.clipboardData)
{
// the IE-manier
window.clipboardData.setData("Text", meintext);
// waarschijnlijk niet de beste manier om Moz/NS te detecteren;
// het is mij echter onbekend vanaf welke versie dit precies werkt:
}
else if (window.netscape)
{
// dit is belangrijk maar staat nergens duidelijk vermeld:
// you have to sign the code to enable this, or see notes below
netscape.security.PrivilegeManager.enablePrivilege('UniversalXPConnect');
// maak een interface naar het clipboard
var clip = Components.classes['@mozilla.org/widget/clipboard;1'].createInstance(Components.interfaces.nsIClipboard);
if (!clip) return;
// maak een transferable
var trans = Components.classes['@mozilla.org/widget/transferable;1'].createInstance(Components.interfaces.nsITransferable);
if (!trans) return;
// specificeer wat voor soort data we op willen halen; text in dit geval
trans.addDataFlavor('text/unicode');
// om de data uit de transferable te halen hebben we 2 nieuwe objecten nodig om het in op te slaan
var str = new Object();
var len = new Object();
var str = Components.classes["@mozilla.org/supports-string;1"].createInstance(Components.interfaces.nsISupportsString);
var copytext=meintext;
str.data=copytext;
trans.setTransferData("text/unicode",str,copytext.length*2);
var clipid=Components.interfaces.nsIClipboard;
if (!clip) return false;
clip.setData(trans,null,clipid.kGlobalClipboard);

}
return false;
}
</script>
</head>
<body>
<?php
$objClass = new htmlTabletoDb();
$html = $_POST["htmltable"];
$tableName = $_POST["dbTable"];
$arr_columnsName = $_POST["column"];
$totalRows = 0;
$totalColumns = 0;
$columnsName = '';
$columnsData = '';
foreach($arr_columnsName as $cKey => $cValue)
{
if($arr_columnsName[$cKey]!='' && $columnsName=='')
$columnsName .= $arr_columnsName[$cKey];
elseif($arr_columnsName[$cKey]!='')
$columnsName .= ",".$arr_columnsName[$cKey];
}
$arr_data = $objClass->ParseTable($html);
foreach($arr_data as $outerKey => $outerValue)
{
$sql .= "\n\rINSERT INTO ".$tableName."(".$columnsName.") \nVALUES(";
foreach($arr_data[$outerKey] as $innerKey => $innerValue)
{
if($arr_columnsName[$innerKey]!=''){
if($columnsData =='')
$columnsData .="'".trim($arr_data[$outerKey][$innerKey])."'";
else
$columnsData .=",'".trim($arr_data[$outerKey][$innerKey])."'";
}
}
$sql .= $columnsData.");";
$columnsData = '';
}
?>
<form name="test">
<table cellpadding="0" cellspacing="0" width="75%" align="center" >
<tr>
<td>SQL Quries</td>
<td align="right"><a href="index.php">Home</a> &nbsp;&nbsp;::&nbsp;&nbsp;<a
href="javascript:selectAll('test.select2')">Select All</a>&nbsp;&nbsp;::&nbsp;&nbsp;<input type="button" value="Copy"
onClick="return copy_clip(test.select2.value);"></td>
</tr>
<tr>
<td colspan="2"><textarea cols="100" rows="25" name="select2" ><?=$sql?></textarea></td>
</tr>
</table>
</form>
</body>
</html>

You might also like