User Tools

Site Tools


mysql2pgsql

На главную

Конвертация данных из MySQL в PostgreSQL

Сильно ковырятый скрипт из проекта mysql2pgsql. Хотел им патч послать, а у них там весь мэйллист в иероглифах..

Фичи

  • Конвертация FOREIGN KEY'ев
  • Конвертация индексов
  • Конвертация BLOB'ов
  • Вставка данных в таблицу единой транзакцией

Известные недостатки

  • Поддержка только одного авто-инкрементного поля на таблицу
  • Довольно медленная вставка в PostgreSQL. В идеале INSERT'ы нужно заменить командой COPY

Использование

Скрипт принимает в STDIN дамп из MySQL'я, выводит в STDOUT дамп для PostgreSQL. База в PostgreSQL должна быть создана заранее и в ней не должно быть таблиц, которые есть в дампе MySQL'я.

Команда для экспорта из MySQL'я

  mysqldump --user=USER_NAME --extended-insert=0 --add-drop-table=FALSE --add-locks=FALSE --hex-blob --password=PASSWORD DATABASE_NAME > pg_dump.sql

Команда для импорта в postgres:

  cat pg_dump.sql | perl mysql2pgsql.pl | psql -U USER_NAME -W DATABASE_NAME

В принципе можно запустить прямо пайпом из mysql'я в postgres:

  mysqldump --user=USER_NAME --extended-insert=0 --add-drop-table=FALSE --add-locks=FALSE --hex-blob --password=PASSWORD DATABASE_NAME | perl mysql2pgsql.pl | psql -U USER_NAME -W DATABASE_NAME
#!/usr/bin/perl -w
use Data::Dumper;
use Getopt::Long;
# the next two work with SET datatype
use List::Permutor;   # included with this
use List::PowerSet qw(powerset); # included with this
 
use strict;
 
# command arguments
my( $DEBUG, $HELP, $SCHEMA, $opt_debug, $opt_help, $opt_schema);
# general values
my ($out, $size);
# variables for constructing pre-create-table entities
my $create_sql='';	# if empty we are not making a create statement
my %constraints=(); #  holds enumeration and sets which require extra code to create
my ( $index,$column_name, $seq);
my $table='';
my $pre_create_sql='';
my $foreign_key_create_sql = '';
my $index_create_sql='';
my $transaction = 0;
my $mysql_datatypesStr = "(TINYINT|SMALLINT|MEDIUMINT|INT|INTEGER|BIGINT|REAL|DOUBLE|FLOAT|DECIMAL|NUMERIC|DATE|TIME|TIMESTAMP|DATETIME|YEAR|CHAR|VARCHAR|BINARY|VARBINARY|TINYBLOB|BLOB|MEDIUMBLOB|LONGBLOB|TINYTEXT|TEXT|MEDIUMTEXT|LONGTEXT|ENUM|SET)";
my $table_start_time = time();
 
# create separate table to reference and to hold mysql's enum data type
# values.  All this table's creation before create table
# definition
# and create a reference to that table in the create table statement
sub convert_enum_and_set_to_constraint_table {
	my ($d, $type, @tmp, $column_valuesStr, @column_values, $max_length, $value, $setStr, $constraint_table_name);
	if ($DEBUG) {
		$d = Data::Dumper->new([\%constraints]);
		print $d->Dump;
	}
	foreach $column_name (keys %constraints) {
        $type=$constraints{$column_name}{'type'};
        $column_valuesStr = $constraints{$column_name}{'values'};
		if ($DEBUG) {
			$d = Data::Dumper->new([$constraints{$column_name}]);
			print $d->Dump;
		}
		$constraint_table_name = ${table} . "_" . ${column_name} ."_constraint_table";
		$max_length=0;
		@column_values = split /,/, $column_valuesStr;
		if ($DEBUG) {
			 print STDERR  "---- column_values = " . $#column_values . "\n";
		}
        # do constraints for a set
        # yes, i feel sorry for whoever needs to change their acceptable mysql SET values down the road
        # yes, this is
        if ($type eq "set") {
            my $ps = powerset(@column_values);  # all unique sets regardless of order, returns array refs of array refs
            while (my $power_set = pop(@$ps)) {
                my $perm = new List::Permutor @$power_set;  # get all orderings of this set
                while (my @set = $perm->next) {
                    #  print "One order is @set.\n";
                    $setStr  = join (',', @set);
                    $setStr =~ s/','/,/g;  # change   'r','d'  into 'r,d'
                    push(@tmp, $setStr);
                }
            }
            @column_values = @tmp;
        }
 
        # do constraints for an enum
        # find max length of values in enumeration
        foreach (@column_values) {
            if ($max_length < length($_)) { $max_length = length($_); }
        }
        $pre_create_sql .= qq~create table $constraint_table_name  ("${column_name}" varchar( $max_length ) primary key)\\g\n~;
        foreach $value (@column_values) {
        if ($DEBUG) {
            print STDERR "---- column_value = " . $value . "\n";
        }
            if ($value =~ m/'/) {
            $pre_create_sql .= qq~insert into $constraint_table_name   values (  $value  )\\g\n~; # ad ' for ints and varchars
            } else {
                $pre_create_sql .= qq~insert into $constraint_table_name values ('${value}')\\g\n~; # ad ' for ints and varchars
            }
        }
	}
	# now append to $pre_create_sql the " create table " clause which references $constraint_table_name
} # END sub convert_enum_and_set_to_constraint_table
 
# print create table and reset create table vars
# when moving from each "create table" to "insert" part of dump
sub print_table_def() {
    my ( @create_idx_commandsArr, $stmt, $stmts, $table_field_combination);
    &convert_enum_and_set_to_constraint_table;
	print $pre_create_sql;
	print $create_sql;
	$create_sql="";
	$pre_create_sql='';
	# empty %constraints for next " create table" statement
	%constraints=();
}
 
sub toOctets($) {
  $_ =~ /0x([A-Fa-f0-9]+)/;
  return ",'".hex2dec($1)."'";
}
 
sub hex2dec($) {
 my $srcString = shift;
 my $dstString = '';
 my $strLen = length $srcString;
 if (($strLen % 2) != 0) {
  $srcString = '0'.$srcString;
  $strLen++;
 }
 for (my $i = 0; $i < $strLen; $i+=2) {
  my $currentOctet = substr($srcString, $i, 2);
  $dstString .= '\\\\'.sprintf('%03o', hex($currentOctet));
 }
 return $dstString;
}
 
 
my %pg_keywords = (
  "ABORT" => 1, "ABS" => 1, "ABSOLUTE" => 1, "ACCESS" => 1,
  "ACTION" => 1, "ADA" => 1, "ADD" => 1, "ADMIN" => 1, "AFTER" => 1,
  "AGGREGATE" => 1, "ALIAS" => 1, "ALL" => 1, "ALLOCATE" => 1,
  "ALTER" => 1, "ANALYSE" => 1, "ANALYZE" => 1, "AND" => 1, "ANY" => 1,
  "ARE" => 1, "ARRAY" => 1, "AS" => 1, "ASC" => 1, "ASENSITIVE" => 1,
  "ASSERTION" => 1, "ASSIGNMENT" => 1, "ASYMMETRIC" => 1, "AT" => 1,
  "ATOMIC" => 1, "AUTHORIZATION" => 1, "AVG" => 1,
  "BACKWARD" => 1, "BEFORE" => 1, "BEGIN" => 1, "BETWEEN" => 1,
  "BIGINT" => 1, "BINARY" => 1, "BIT" => 1, "BITVAR" => 1,
  "BIT_LENGTH" => 1, "BLOB" => 1, "BOOLEAN" => 1, "BOTH" => 1,
  "BREADTH" => 1, "BY" => 1,
  "C" => 1, "CACHE" => 1, "CALL" => 1, "CALLED" => 1, "CARDINALITY" => 1,
  "CASCADE" => 1, "CASCADED" => 1, "CASE" => 1, "CAST" => 1,
  "CATALOG" => 1, "CATALOG_NAME" => 1, "CHAIN" => 1, "CHAR" => 1,
  "CHARACTER" => 1, "CHARACTERISTICS" => 1, "CHARACTER_LENGTH" => 1,
  "CHARACTER_SET_CATALOG" => 1, "CHARACTER_SET_NAME" => 1,
  "CHARACTER_SET_SCHEMA" => 1, "CHAR_LENGTH" => 1, "CHECK" => 1,
  "CHECKED" => 1, "CHECKPOINT" => 1, "CLASS" => 1, "CLASS_ORIGIN" => 1,
  "CLOB" => 1, "CLOSE" => 1, "CLUSTER" => 1, "COALESCE" => 1,
  "COBOL" => 1, "COLLATE" => 1, "COLLATION" => 1,
  "COLLATION_CATALOG" => 1, "COLLATION_NAME" => 1,
  "COLLATION_SCHEMA" => 1, "COLUMN" => 1, "COLUMN_NAME" => 1,
  "COMMAND_FUNCTION" => 1, "COMMAND_FUNCTION_CODE" => 1, "COMMENT" => 1,
  "COMMIT" => 1, "COMMITTED" => 1, "COMPLETION" => 1,
  "CONDITION_NUMBER" => 1, "CONNECT" => 1, "CONNECTION" => 1,
  "CONNECTION_NAME" => 1, "CONSTRAINT" => 1, "CONSTRAINTS" => 1,
  "CONSTRAINT_CATALOG" => 1, "CONSTRAINT_NAME" => 1,
  "CONSTRAINT_SCHEMA" => 1, "CONSTRUCTOR" => 1, "CONTAINS" => 1,
  "CONTINUE" => 1, "CONVERSION" => 1, "CONVERT" => 1, "COPY" => 1,
  "CORRESPONDING" => 1, "COUNT" => 1, "CREATE" => 1, "CREATEDB" => 1,
  "CREATEUSER" => 1, "CROSS" => 1, "CUBE" => 1, "CURRENT" => 1,
  "CURRENT_DATE" => 1, "CURRENT_PATH" => 1, "CURRENT_ROLE" => 1,
  "CURRENT_TIME" => 1, "CURRENT_TIMESTAMP" => 1, "CURRENT_USER" => 1,
  "CURSOR" => 1, "CURSOR_NAME" => 1, "CYCLE" => 1,
  "DATA" => 1, "DATABASE" => 1, "DATE" => 1,
  "DATETIME_INTERVAL_CODE" => 1, "DATETIME_INTERVAL_PRECISION" => 1,
  "DAY" => 1, "DEALLOCATE" => 1, "DEC" => 1, "DECIMAL" => 1,
  "DECLARE" => 1, "DEFAULT" => 1, "DEFAULTS" => 1, "DEFERRABLE" => 1,
  "DEFERRED" => 1, "DEFINED" => 1, "DEFINER" => 1, "DELETE" => 1,
  "DELIMITER" => 1, "DELIMITERS" => 1, "DEPTH" => 1, "DEREF" => 1,
  "DESC" => 1, "DESCRIBE" => 1, "DESCRIPTOR" => 1, "DESTROY" => 1,
  "DESTRUCTOR" => 1, "DETERMINISTIC" => 1, "DIAGNOSTICS" => 1,
  "DICTIONARY" => 1, "DISCONNECT" => 1, "DISPATCH" => 1, "DISTINCT" => 1,
  "DO" => 1, "DOMAIN" => 1, "DOUBLE" => 1, "DROP" => 1, "DYNAMIC" => 1,
  "DYNAMIC_FUNCTION" => 1, "DYNAMIC_FUNCTION_CODE" => 1,
  "EACH" => 1, "ELSE" => 1, "ENCODING" => 1, "ENCRYPTED" => 1,
  "END" => 1, "END-EXEC" => 1, "EQUALS" => 1, "ESCAPE" => 1,
  "EVERY" => 1, "EXCEPT" => 1, "EXCEPTION" => 1, "EXCLUDING" => 1,
  "EXCLUSIVE" => 1, "EXEC" => 1, "EXECUTE" => 1, "EXISTING" => 1,
  "EXISTS" => 1, "EXPLAIN" => 1, "EXTERNAL" => 1, "EXTRACT" => 1,
  "FALSE" => 1, "FETCH" => 1, "FINAL" => 1, "FIRST" => 1, "FLOAT" => 1,
  "FOR" => 1, "FORCE" => 1, "FOREIGN" => 1, "FORTRAN" => 1,
  "FORWARD" => 1, "FOUND" => 1, "FREE" => 1, "FREEZE" => 1, "FROM" => 1,
  "FULL" => 1, "FUNCTION" => 1,
  "G" => 1, "GENERAL" => 1, "GENERATED" => 1, "GET" => 1, "GLOBAL" => 1,
  "GO" => 1, "GOTO" => 1, "GRANT" => 1, "GRANTED" => 1, "GROUP" => 1,
  "GROUPING" => 1,
  "HANDLER" => 1, "HAVING" => 1, "HIERARCHY" => 1, "HOLD" => 1,
  "HOST" => 1, "HOUR" => 1,
  "IDENTITY" => 1, "IGNORE" => 1, "ILIKE" => 1, "IMMEDIATE" => 1,
  "IMMUTABLE" => 1, "IMPLEMENTATION" => 1, "IMPLICIT" => 1, "IN" => 1,
  "INCLUDING" => 1, "INCREMENT" => 1, "INDEX" => 1, "INDICATOR" => 1,
  "INFIX" => 1, "INHERITS" => 1, "INITIALIZE" => 1, "INITIALLY" => 1,
  "INNER" => 1, "INOUT" => 1, "INPUT" => 1, "INSENSITIVE" => 1,
  "INSERT" => 1, "INSTANCE" => 1, "INSTANTIABLE" => 1, "INSTEAD" => 1,
  "INT" => 1, "INTEGER" => 1, "INTERSECT" => 1, "INTERVAL" => 1,
  "INTO" => 1, "INVOKER" => 1, "IS" => 1, "ISNULL" => 1,
  "ISOLATION" => 1, "ITERATE" => 1,
  "JOIN" => 1,
  "K" => 1, "KEY" => 1, "KEY_MEMBER" => 1, "KEY_TYPE" => 1,
  "LANCOMPILER" => 1, "LANGUAGE" => 1, "LARGE" => 1, "LAST" => 1,
  "LATERAL" => 1, "LEADING" => 1, "LEFT" => 1, "LENGTH" => 1,
  "LESS" => 1, "LEVEL" => 1, "LIKE" => 1, "LIMIT" => 1, "LISTEN" => 1,
  "LOAD" => 1, "LOCAL" => 1, "LOCALTIME" => 1, "LOCALTIMESTAMP" => 1,
  "LOCATION" => 1, "LOCATOR" => 1, "LOCK" => 1, "LOWER" => 1,
  "M" => 1, "MAP" => 1, "MATCH" => 1, "MAX" => 1, "MAXVALUE" => 1,
  "MESSAGE_LENGTH" => 1, "MESSAGE_OCTET_LENGTH" => 1,
  "MESSAGE_TEXT" => 1, "METHOD" => 1, "MIN" => 1, "MINUTE" => 1,
  "MINVALUE" => 1, "MOD" => 1, "MODE" => 1, "MODIFIES" => 1,
  "MODIFY" => 1, "MODULE" => 1, "MONTH" => 1, "MORE" => 1, "MOVE" => 1,
  "MUMPS" => 1, "NAME" => 1, "NAMES" => 1,
  "NATIONAL" => 1, "NATURAL" => 1, "NCHAR" => 1, "NCLOB" => 1,
  "NEW" => 1, "NEXT" => 1, "NO" => 1, "NOCREATEDB" => 1,
  "NOCREATEUSER" => 1, "NONE" => 1, "NOT" => 1, "NOTHING" => 1,
  "NOTIFY" => 1, "NOTNULL" => 1, "NULL" => 1, "NULLABLE" => 1,
  "NULLIF" => 1, "NUMBER" => 1, "NUMERIC" => 1,
  "OBJECT" => 1, "OCTET_LENGTH" => 1, "OF" => 1, "OFF" => 1,
  "OFFSET" => 1, "OIDS" => 1, "OLD" => 1, "ON" => 1, "ONLY" => 1,
  "OPEN" => 1, "OPERATION" => 1, "OPERATOR" => 1, "OPTION" => 1,
  "OPTIONS" => 1, "OR" => 1, "ORDER" => 1, "ORDINALITY" => 1,
  "OUT" => 1, "OUTER" => 1, "OUTPUT" => 1, "OVERLAPS" => 1,
  "OVERLAY" => 1, "OVERRIDING" => 1, "OWNER" => 1,
  "PAD" => 1, "PARAMETER" => 1, "PARAMETERS" => 1, "PARAMETER_MODE" => 1,
  "PARAMETER_NAME" => 1, "PARAMETER_ORDINAL_POSITION" => 1,
  "PARAMETER_SPECIFIC_CATALOG" => 1, "PARAMETER_SPECIFIC_NAME" => 1,
  "PARAMETER_SPECIFIC_SCHEMA" => 1, "PARTIAL" => 1, "PASCAL" => 1,
  "PASSWORD" => 1, "PATH" => 1, "PENDANT" => 1, "PLACING" => 1,
  "PLI" => 1, "POSITION" => 1, "POSTFIX" => 1, "PRECISION" => 1,
  "PREFIX" => 1, "PREORDER" => 1, "PREPARE" => 1, "PRESERVE" => 1,
  "PRIMARY" => 1, "PRIOR" => 1, "PRIVILEGES" => 1, "PROCEDURAL" => 1,
  "PROCEDURE" => 1, "PUBLIC" => 1,
  "READ" => 1, "READS" => 1, "REAL" => 1, "RECHECK" => 1,
  "RECURSIVE" => 1, "REF" => 1, "REFERENCES" => 1, "REFERENCING" => 1,
  "REINDEX" => 1, "RELATIVE" => 1, "RENAME" => 1, "REPEATABLE" => 1,
  "REPLACE" => 1, "RESET" => 1, "RESTART" => 1, "RESTRICT" => 1,
  "RESULT" => 1, "RETURN" => 1, "RETURNED_LENGTH" => 1,
  "RETURNED_OCTET_LENGTH" => 1, "RETURNED_SQLSTATE" => 1, "RETURNS" => 1,
  "REVOKE" => 1, "RIGHT" => 1, "ROLE" => 1, "ROLLBACK" => 1,
  "ROLLUP" => 1, "ROUTINE" => 1, "ROUTINE_CATALOG" => 1,
  "ROUTINE_NAME" => 1, "ROUTINE_SCHEMA" => 1, "ROW" => 1, "ROWS" => 1,
  "ROW_COUNT" => 1, "RULE" => 1,
  "SAVEPOINT" => 1, "SCALE" => 1, "SCHEMA" => 1, "SCHEMA_NAME" => 1,
  "SCOPE" => 1, "SCROLL" => 1, "SEARCH" => 1, "SECOND" => 1,
  "SECTION" => 1, "SECURITY" => 1, "SELECT" => 1, "SELF" => 1,
  "SENSITIVE" => 1, "SEQUENCE" => 1, "SERIALIZABLE" => 1,
  "SERVER_NAME" => 1, "SESSION" => 1, "SESSION_USER" => 1,
  "SET" => 1, "SETOF" => 1, "SETS" => 1, "SHARE" => 1, "SHOW" => 1,
  "SIMILAR" => 1, "SIMPLE" => 1, "SIZE" => 1, "SMALLINT" => 1,
  "SOME" => 1, "SOURCE" => 1, "SPACE" => 1, "SPECIFIC" => 1,
  "SPECIFICTYPE" => 1, "SPECIFIC_NAME" => 1, "SQL" => 1, "SQLCODE" => 1,
  "SQLERROR" => 1, "SQLEXCEPTION" => 1, "SQLSTATE" => 1,
  "SQLWARNING" => 1, "STABLE" => 1, "START" => 1, "STATE" => 1,
  "STATEMENT" => 1, "STATIC" => 1, "STATISTICS" => 1, "STDIN" => 1,
  "STDOUT" => 1, "STORAGE" => 1, "STRICT" => 1, "STRUCTURE" => 1,
  "STYLE" => 1, "SUBCLASS_ORIGIN" => 1, "SUBLIST" => 1, "SUBSTRING" => 1,
  "SUM" => 1, "SYMMETRIC" => 1, "SYSID" => 1, "SYSTEM" => 1,
  "SYSTEM_USER" => 1,
  "TABLE" => 1, "TABLE_NAME" => 1, "TEMP" => 1, "TEMPLATE" => 1,
  "TEMPORARY" => 1, "TERMINATE" => 1, "THAN" => 1, "THEN" => 1,
  "TIME" => 1, "TIMESTAMP" => 1, "TIMEZONE_HOUR" => 1,
  "TIMEZONE_MINUTE" => 1, "TO" => 1, "TOAST" => 1, "TRAILING" => 1,
  "TRANSACTION" => 1, "TRANSACTIONS_COMMITTED" => 1,
  "TRANSACTIONS_ROLLED_BACK" => 1, "TRANSACTION_ACTIVE" => 1,
  "TRANSFORM" => 1, "TRANSFORMS" => 1, "TRANSLATE" => 1,
  "TRANSLATION" => 1, "TREAT" => 1, "TRIGGER" => 1,
  "TRIGGER_CATALOG" => 1, "TRIGGER_NAME" => 1, "TRIGGER_SCHEMA" => 1,
  "TRIM" => 1, "TRUE" => 1, "TRUNCATE" => 1, "TRUSTED" => 1, "TYPE" => 1,
  "UNCOMMITTED" => 1, "UNDER" => 1, "UNENCRYPTED" => 1, "UNION" => 1,
  "UNIQUE" => 1, "UNKNOWN" => 1, "UNLISTEN" => 1, "UNNAMED" => 1,
  "UNNEST" => 1, "UNTIL" => 1, "UPDATE" => 1, "UPPER" => 1, "USAGE" => 1,
  "USER" => 1, "USER_DEFINED_TYPE_CATALOG" => 1,
  "USER_DEFINED_TYPE_NAME" => 1, "USER_DEFINED_TYPE_SCHEMA" => 1,
  "USING" => 1,
  "VACUUM" => 1, "VALID" => 1, "VALIDATOR" => 1, "VALUE" => 1,
  "VALUES" => 1, "VARCHAR" => 1, "VARIABLE" => 1, "VARYING" => 1,
  "VERBOSE" => 1, "VERSION" => 1, "VIEW" => 1, "VOLATILE" => 1,
  "WHEN" => 1, "WHENEVER" => 1, "WHERE" => 1, "WITH" => 1,
  "WITHOUT" => 1, "WORK" => 1, "WRITE" => 1,
  "YEAR" => 1,
  "ZONE" => 1,
);
 
 
GetOptions("debug", "help", "schema=s" => \$SCHEMA);
 
$DEBUG = $opt_debug || 0;
$HELP = $opt_help || 0;
 
binmode STDIN;
binmode STDOUT;
 
my $start = time();
 
print "--\n";
print "-- Generated from mysql2pgsql.perl\n";
print "-- http://gborg.postgresql.org/project/mysql2psql/\n";
print "-- (c) 2004, PostgreSQL Inc.\n";
print "--\n";
print "\n";
print "-- errors are generated for drop tables if they do not exist\n";
print "-- please see http://archives.postgresql.org/pgsql-novice/2004-10/msg00158.php\n";
 
if ($SCHEMA ) {
    print "set search_path='" . $SCHEMA . "'\\g\n" ;
}
 
while(<>) {
    s/`//g;  #  pgsql uses no backticks to denote table name (create table `sd`) or around field names like  mysql
	# comments or empty lines
	if (/^#/ || /^$/ || /^--/) {
		print $_;
		next;
	}
	if (/^USE\s*([^;]*);/) {
		print "\\c ". $1;
		next;
	}
 
	if ($create_sql ne "") { 		# we are inside create table statement so lets process datatypes
            if (/(constraint .*? foreign key .*)/i) {
		s/,$//g;
		$foreign_key_create_sql .= "alter table $table add $_;\n";
	        next;
	    }
		if (/\);/i) {	# end of create table squence
			$create_sql =~ s/,$//g;	# strip last , inside create table
			$create_sql .= $_;
			&print_table_def();
			print ("begin;\n");
			$transaction = 1;
		  next;
        } elsif (/\).*;/i) { # end of create table sequence
            s/Type=[^;\s]+//i; # ISAM ,   # older versions
            s/DEFAULT CHARSET=[^;\s]+//i;   #  my mysql version is 4.1.11
            s/ENGINE=[^;\s]+//i;   #  my mysql version is 4.1.11
	    s/PARTITION BY LINEAR KEY \(.*\) PARTITIONS \d+//i;
	    s/AUTO_INCREMENT=(\d+)//i;
			$create_sql =~ s/,$//g;	# strip last , inside create table
			$create_sql .= $_;
			&print_table_def();
			print ("begin;\n");
			$transaction = 1;
			next;
		}
# A SET is a string object that can have zero or more values, each of which must be chosen from a list of allowed values specified when the table is created. SET column values that consist of multiple set members are specified with members separated by commas (‘,’). A consequence of this is that SET member values should not themselves contain commas.
 
#For example, a column specified as SET('one', 'two') NOT NULL can have any of these values:
 
#''
#'one'
#'two'
#'one,two'
		if (/(\w*)\s+set\(((?:['"]\w+['"]\s*,)+['"]\w+['"])\)(.*)$/i) { # example:  `au_auth` set('r','w','d') NOT NULL default '',
			$column_name=$1;
			$constraints{$column_name}{'values'} = $2;  # 'abc','def', ...
			$constraints{$column_name}{'type'} = "set";  # 'abc','def', ...
			$_ =  qq~ $1 varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES ${table}_${column_name}_constraint_table (${column_name}) $3\n~;
		} elsif (/(\w*)\s+enum\(((?:['"]\w+['"]\s*,)+['"]\w+['"])\)(.*)$/i) { # enum handling
			$column_name=$1;
			$constraints{$column_name}{'values'} = $2;  # 'abc','def', ...
			$constraints{$column_name}{'type'} = "enum";  # 'abc','def', ...
			$_ =  qq~ $1 varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES ${table}_${column_name}_constraint_table (${column_name}) $3\n~;
		} elsif (/(\w*)\s+year\(4\)(.*)$/i) { # can be integer OR string 1901-2155
			$column_name=$1;
			my @year_holder = ();
			my $year;
			for (1901 .. 2155) {
					$year = "'$_'";
				unless ($year =~ /2155/) { $year .= ','; }
				 push( @year_holder, $year);
			}
			$constraints{$column_name}{'values'} = join('','',@year_holder);   # '1901','1902', ...
			$constraints{$column_name}{'type'} = "year";  # 'abc','def', ...
			$_ =  qq~ $1 varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES ${table}_${column_name}_constraint_table (${column_name}) $2\n~;
		} elsif (/(\w*)\s+year\(2\)(.*)$/i) { # can be integer OR string 1901-2155
			$column_name=$1;
			my @year_holder = ();
			my $year;
			for (1970 .. 2069) {
				$year = "'$_'";
				if ($year =~ /2069/) { next; }
				push( @year_holder, $year);
			}
			push( @year_holder, '0000');
			$constraints{$column_name}{'values'} = join(',',@year_holder);   # '1971','1972', ...
			$constraints{$column_name}{'type'} = "year";  # 'abc','def', ...
			$_ =  qq~ $1 varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES ${table}_${column_name}_constraint_table (${column_name}) $2\n~;
		} elsif (/^[\s\t]*(\w*)\s*.*int.*auto_increment/i) { 		# int,auto_increment -> serial
            $seq = qq~${table}_seq~;
            #  Note:  Before PostgreSQL 8.1, the arguments of the sequence functions were of type text, not regclass, and the above-described conversion from a text string to an OID value would happen at run time during each call. For backwards compatibility, this facility still exists, but internally it is now handled as an implicit coercion from text to regclass before the function is invoked.  (source: 8.1.3 manual, section 9.12)
            s/([\s\t]*)([a-zA-Z_0-9]*)\s*.*int.*auto_increment[^,]*/$1"$2" integer DEFAULT nextval('$seq'::text) NOT NULL/ig;
            $pre_create_sql.="create sequence $seq \;\n";
            $index_create_sql .= "select setval('$seq', (select max($2) from $table));\n";
$create_sql.=$_;
next;
 
		# int type conversion
		} elsif (/(\w*)int\(\d+\)/i) {
			$size=$1;
			$size =~ tr [A-Z] [a-z];
			if ($size eq "tiny" || $size eq "small") {
				$out = "int2";
			} elsif ($size eq "big") {
				$out = "int8";
			} else {
				$out = "int4";
			}
			s/\w*int\(\d+\)/$out/g;
		}
 
		# nuke int unsigned
		s/(int\w+)\s+unsigned/$1/gi;
 
		# blob -> text
        #  mysql docs: A BLOB is a binary large object that can hold a variable amount of data.
		s/\w*blob/bytea/gi;
		# tinytext/mediumtext -> text
		s/tinytext/text/gi;
		s/mediumtext/text/gi;
		s/character set utf8 collate utf8_unicode_ci//gi;
 
		# char -> varchar
		# PostgreSQL would otherwise pad with spaces as opposed
		# to MySQL! Your user interface may depend on this!
		s/\s+char/ varchar/gi;
 
		# nuke date representation (not supported in PostgreSQL)
        if (m/datetime/) {
            s/datetime NOT NULL default '0000-00-00 00:00:00'/timestamp without time zone default NULL/i;
            s/datetime default '([^']+)'/timestamp without time zone DEFAULT '$1'/i;  #  `doe` datetime default '2006-03-23 22:50:04',
            s/datetime /timestamp without time zone /i;
            s/date default '[^']+'/timestamp without time zone/i;
            s/time default '[^']+'/timestamp without time zone/i;
 
            # change not null datetime field to null valid ones
            # (to support remapping of "zero time" to null
            s/datetime not null/timestamp without time zone/i;
        }
	s/'0000-00-00'/null/i;
 
		# nuke size of timestamp
		s/timestamp\([^)]*\)/timestamp/i;
 
		# double -> float8
		s/double\([^)]*\)/float8/i;
 
		# add unique to definition of type (MySQL separates this)
		if (/unique \w+ \((\w+)\)/i) {
			$create_sql.=~s/($1)([^,]+)/$1$2 unique/i;
			next;
		}
        #There are three types of indexes (keys) in MySQL (plus fulltext),
 
		# FIXED (?) : unique for multipe columns (col1,col2) are unsupported!
        # unique constraint w/ multipe columns (col1,col2) in Pg 7.4
 
        if ( !/$mysql_datatypesStr.*key/i) {
            if ( /^\s+fulltext key\s+/i) {  # that is key as a word in the first check for a match
                # the tsvector datatype is made for these types of things
                  # example mysql file:
                  #  `commenttext` text NOT NULL,
                  #  `deleted` enum('true','false') NOT NULL default 'false',
                  #  `onhold` tinyint(1) NOT NULL default '0',
                  #  PRIMARY KEY (`commentid`),
                  #  FULLTEXT KEY `commenttext` (`commenttext`)
                #  what is tsvector datatype?
                #  http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html
                next;
            } elsif ( /^\s+unique key\s+/i ) {
                s/unique key .* (\(.*\))/UNIQUE $1/i;
		$create_sql .= $_;
                next;
            } elsif ( /^\s+primary key\s+/i ) {
                chomp($_);
		s/,$//;
		$index_create_sql .= "ALTER TABLE $table ADD constraint ".$table."_pkey $_;\n";
                next;
            } elsif (/\bkey\b/i ) {
                #regular key: allows null=YES, allows duplicates=YES
                # MYSQL:   KEY is normally a synonym for INDEX.
                # example:   KEY `idx_mod_english_def_word` (`word`),
                m/key (.*?) (\(.*\))/i;		 # hack off name of the non-primary key
                #  * MySQL: ALTER TABLE {$table} ADD KEY $column ($column)
                #  * PostgreSQL: CREATE INDEX {$table}_$column_idx ON {$table}($column) // Please note the _idx "extension"
                #    PRIMARY KEY (`postid`),
                #    KEY `ownerid` (`ownerid`)
                # create an index for everything which has a key listed for it.
                $index_create_sql.="CREATE INDEX ${table}_$1 ON $table $2\;\n\n";
                # just create index do not add to create table statement
                next;
            }
 
        } elsif (/$mysql_datatypesStr.*key/i) {
            $index = qq~${table}_${1}_idx~;   #$1 is the type of column
            $index_create_sql.="CREATE INDEX $index ON ${table}  \;\n\n";
            $create_sql.=$_;
        }
        # TODO: create a trigger for this
        s/on update CURRENT_TIMESTAMP//;
 
        # quote column names
        if ( /^(\s*)(\S+)(\s*)/i and !$pg_keywords{uc $2}) {
            s/^(\s*)(\S+)(\s*)/$1"$2"$3/i;
        }
		$create_sql.=$_;
		} #  END of if ($create_sql ne "") i.e. were inside create table statement so processed datatypes
	else {	# not inside create table
	  s/,0x(,|\))/,NULL$1/g;
	  s/,(0x[a-f0-9]{2,})/toOctets($1)/ige;
		#---- fix data in inserted data: (from MS world)
		# FIX: disabled for now
		if (00 && /insert into/i) {
			s!\x96!-!g;	# --
			s!\x93!"!g;	# ``
			s!\x94!"!g;	# ''
			s!\x85!... !g;	# \ldots
			s!\x92!`!g;
		}
 
		# fix dates '0000-00-00 00:00:00' (should be null)
		s/'0000-00-00 00:00:00'/null/gi;
		s/'0000-00-00'/null/gi;
		s/'00:00:00'/null/gi;
		# s/([12]\d\d\d)([01]\d)([0-3]\d)([0-2]\d)([0-6]\d)([0-6]\d)/'$1-$2-$3 $4:$5:$6'/;
 
		# protect ; in inserts
		#while (/('[^']*);([^']*)'/) {
		#	s/('[^']*);([^']*')/$1 _dotcol_ $2/g;
		#}
 
        #  DROP TABLE IF EXISTS `english_english`;
		if (/drop\s+table\s+if\s+exists\s+(\w+)/i) {
			next;
            s/`//g;  #  pgsql uses no backticks to denote table name like  mysql
            # remove "if exists" from "drop table" if used
            #  to drop a table that is referenced by a view or a foreign-key constraint of another table, CASCADE must be specified. (CASCADE will remove a dependent view entirely, but in the foreign-key case it will only remove the foreign-key constraint, not the other table entirely.)  (source: 8.1.3 docs, section "drop table")
            s/drop\s+table\s+if\s+exists\s+(\w+)/DROP TABLE $1 CASCADE/gi;
        }
 
 
		if (/^create table/i) {
			if ($transaction) {
				print "\ncommit;\n";
				print STDERR "Table $table: ".(time() - $table_start_time)." seconds\n";
				$table_start_time = time();
				$transaction = 0;
			}
            #  CREATE TABLE `english_english` (
            s/`//g;  #  pgsql uses no backticks to denote table name like mysql
			$create_sql = $_;
			/create table (.+?) /i;
			$table=$1;
		} else {
			print $_;
		}
	} # end of if inside create_table
} # END while(<>)
 
if ($transaction) {
	print "commit;";
	$transaction = 0;
}
 
print $index_create_sql;
 
print $foreign_key_create_sql;
 
print STDERR "Total time: ".(time() - $start)." seconds\n";
mysql2pgsql.txt · Last modified: 2019/06/12 16:08 by 127.0.0.1