standard unambiguos format [R] MySQL imported data - mysql

OK, to set the scene, I have written a function to import multiple tables from MySQL (using RODBC) and run randomForest() on them.
This function is run on multiple databases (as separate instances).
In one particular database, and one particular table, the "error in as.POSIXlt.character(x, tz,.....): character string not in a standard unambiguous format" error is thrown. The function runs on around 150 tables across two databases without any issues except this one table.
Here is a head() print from the table:
MQLTime bar5 bar4 bar3 bar2 bar1 pat1 baXRC
1 2014-11-05 23:35:00 184 24 8 24 67 147 Flat
2 2014-11-05 23:57:00 203 184 204 67 51 147 Flat
3 2014-11-06 00:40:00 179 309 49 189 75 19 Flat
4 2014-11-06 00:46:00 28 192 60 49 152 147 Flat
5 2014-11-06 01:20:00 309 48 9 11 24 19 Flat
6 2014-11-06 01:31:00 24 177 64 152 188 19 Flat
And here is the function:
GenerateRF <- function(db, countstable, RFcutoff) {
'load required libraries'
library(RODBC)
library(randomForest)
library(caret)
library(ff)
library(stringi)
'connection and data preparation'
connection <- odbcConnect ('TTODBC', uid='root', pwd='password', case="nochange")
'import count table and check if RF is allowed to be built'
query.str <- paste0 ('select * from ', db, '.', countstable, ' order by RowCount asc')
row.counts <- sqlQuery (connection, query.str)
'Operate only on tables that have >= RFcutoff'
for (i in 1:nrow (row.counts)) {
table.name <- as.character (row.counts[i,1])
col.count <- as.numeric (row.counts[i,2])
row.count <- as.numeric (row.counts[i,3])
if (row.count >= 20) {
'Delete old RFs and DFs for input pattern'
if (file.exists (paste0 (table.name, '_RF.Rdata'))) {
file.remove (paste0 (table.name, '_RF.Rdata'))
}
if (file.exists (paste0 (table.name, '_DF.Rdata'))) {
file.remove (paste0 (table.name, '_DF.Rdata'))
}
'import and clean data'
query.str2 <- paste0 ('select * from ', db, '.', table.name, ' order by mqltime asc')
raw.data <- sqlQuery(connection, query.str2)
'partition data into training/test sets'
set.seed(489)
index <- createDataPartition(raw.data$baXRC, p=0.66, list=FALSE, times=1)
data.train <- raw.data [index,]
data.test <- raw.data [-index,]
'find optimal trees to grow (without outcome and dates)
data.mtry <- as.data.frame (tuneRF (data.train [, c(-1,-col.count)], data.train$baXRC, ntreetry=100,
stepFactor=.5, improve=0.01, trace=TRUE, plot=TRUE, dobest=FALSE))
best.mtry <- data.mtry [which (data.mtry[,2] == min (data.mtry[,2])), 1]
'compress df'
data.ff <- as.ffdf (data.train)
'run RF. Originally set to 1000 trees but M1 dataset is to large for laptop. Maybe train at the lab?'
data.rf <- randomForest (baXRC~., data=data.ff[,-1], mtry=best.mtry, ntree=500, keep.forest=TRUE,
importance=TRUE, proximity=FALSE)
'generate and print variable importance plot'
varImpPlot (data.rf, main = table.name)
'predict on test data'
data.test.pred <- as.data.frame( predict (data.rf, data.test, type="prob"))
'get dates and name date column'
data.test.dates <- data.frame (data.test[,1])
colnames (data.test.dates) <- 'MQLTime'
'attach dates to prediction df'
data.test.res <- cbind (data.test.dates, data.test.pred)
'force date coercion to attempt negating unambiguous format error '
data.test.res$MQLTime <- format(data.test.res$MQLTime, format = "%Y-%m-%d %H:%M:%S")
'delete row names, coerce to dataframe, generate row table name and export outcomes to MySQL'
rownames (data.test.res)<-NULL
data.test.res <- as.data.frame (data.test.res)
root.table <- stri_sub(table.name, 0, -5)
sqlUpdate (connection, data.test.res, tablename = paste0(db, '.', root.table, '_outcome'), index = "MQLTime")
'save RF and test df/s for future use; save latest version of row_counts to MQL4 folder'
save (data.rf, file = paste0 ("C:/Users/user/Documents/RF_test2/", table.name, '_RF.Rdata'))
save (data.test, file = paste0 ("C:/Users/user/Documents/RF_test2/", table.name, '_DF.Rdata'))
write.table (row.counts, paste0("C:/Users/user/AppData/Roaming/MetaQuotes/Terminal/71FA4710ABEFC21F77A62A104A956F23/MQL4/Files/", db, "_m1_rowcounts.csv"), sep = ",", col.names = F,
row.names = F, quote = F)
'end of conditional block'
}
'end of for loop'
}
'close all connection to MySQL'
odbcCloseAll()
'clear workspace'
rm(list=ls())
'end of function'
}
At this line:
data.test.res$MQLTime <- format(data.test.res$MQLTime, format = "%Y-%m-%d %H:%M:%S")
I have tried coercing MQLTime using various functions including: as.character(), as.POSIXct(), as.POSIXlt(), as.Date(), format(), as.character(as.Date())
and have also tried:
"%y" vs "%Y" and "%OS" vs "%S"
All variants seem to have no effect on the error and the function is still able to run on all other tables. I have checked the table manually (which contains almost 1500 rows) and also in MySQL looking for NULL dates or dates like "0000-00-00 00:00:00".
Also, if I run the function line by line in R terminal, this offending table is processed without any problems which just confuses the hell out me.
I've exhausted all the functions/solutions I can think of (and also all those I could find through Dr. Google) so I am pleading for help here.
I should probably mention that the MQLTime column is stored as varchar() in MySQL. This was done to try and get around issues with type conversions between R and MySQL
SHOW VARIABLES LIKE "%version%";
innodb_version, 5.6.19
protocol_version, 10
slave_type_conversions,
version, 5.6.19
version_comment, MySQL Community Server (GPL)
version_compile_machine, x86
version_compile_os, Win32
> sessionInfo()
R version 3.0.2 (2013-09-25)
Platform: i386-w64-mingw32/i386 (32-bit)
Edit: Str() output on the data as imported from MySQl showing MQLTime is already in POSIXct format:
> str(raw.data)
'data.frame': 1472 obs. of 8 variables:
$ MQLTime: POSIXct, format: "2014-11-05 23:35:00" "2014-11-05 23:57:00" "2014-11-06 00:40:00" "2014-11-06 00:46:00" ...
$ bar5 : int 184 203 179 28 309 24 156 48 309 437 ...
$ bar4 : int 24 184 309 192 48 177 48 68 60 71 ...
$ bar3 : int 8 204 49 60 9 64 68 27 192 147 ...
$ bar2 : int 24 67 189 49 11 152 27 56 437 67 ...
$ bar1 : int 67 51 75 152 24 188 56 147 71 0 ...
$ pat1 : int 147 147 19 147 19 19 147 19 147 19 ...
$ baXRC : Factor w/ 3 levels "Down","Flat",..: 2 2 2 2 2 2 2 2 2 3 ...
So I have tried declaring stringsAsfactors = FALSE in the dataframe operations and this had no effect.
Interestingly, if the offending table is removed from processing through an additional conditional statement in the first 'if' block, the function stops on the table immediately preceeding the blocked table.
If both the original and the new offending tables are removed from processing, then the function stops on the table immediately prior to them. I have never seen this sort of behavior before and it really has me stumped.
I watched system resources during the function and they never seem to max out.
Could this be a problem with the 'for' loop and not necessarily date formats?

There appears to be some egg on my face. The table following the table where the function was stopping had a row with value '0000-00-00 00:00:00'. I added another statement in my MySQL function to remove these rows when pre-processing the tables. Thanks to those that had a look at this.

Related

Append information in the th tags to td rows

I am an economist struggling with coding and data scraping.
I am scarping data from the main and unique table on this webpage (https://www.oddsportal.com/basketball/europe/euroleague-2013-2014/results/). I can retrieve all the information of the td HTML tags with python selenium by referring to the class element. The same goes for the th tag where it is stored the information of the date and stage of the competition. In my final dataset, I would like to have the information stored in the th tag in two rows (data and stage of the competition) next to the other rows in the table. Basically, for each match, I would like to have the date and the stage of the competition in rows and not as the head of each group of matches.
The only solution I came up with is to index all the rows (with both th and td tags) and build a while loop to append the information in the th tags to the td rows whose index is lower than the next index for the th tag. Hope I made myself clear (if not I will try to give a more graphical explanation). However, I am not able to code such a logic construct due to my poor coding abilities. I do not know if I need two loops to iterate through different tags (td and th) and in case how to do that. If you have any easier solution, it is more than welcome!
Thanks in advance for the precious help!
code below:
from selenium import webdriver
import time
import pandas as pd
# Season to filter
seasons_filt = ['2013-2014', '2014-2015', '2015-2016','2016-2017', '2017-2018', '2018-2019']
# Define empty data
data_keys = ["Season", "Match_Time", "Home_Team", "Away_Team", "Home_Odd", "Away_Odd", "Home_Score",
"Away_Score", "OT", "N_Bookmakers"]
data = dict()
for key in data_keys:
data[key] = list()
del data_keys
# Define 'driver' variable and launch browser
#path = "C:/Users/ALESSANDRO/Downloads/chromedriver_win32/chromedriver.exe"
#path office pc
path = "C:/Users/aldi/Downloads/chromedriver.exe"
driver = webdriver.Chrome(path)
# Loop through pages based on page_num and season
for season_filt in seasons_filt:
page_num = 0
while True:
page_num += 1
# Get url and navigate it
page_str = (1 - len(str(page_num)))* '0' + str(page_num)
url ="https://www.oddsportal.com/basketball/europe/euroleague-" + str(season_filt) + "/results/#/page/" + page_str + "/"
driver.get(url)
time.sleep(3)
# Check if page has no data
if driver.find_elements_by_id("emptyMsg"):
print("Season {} ended at page {}".format(season_filt, page_num))
break
try:
# Teams
for el in driver.find_elements_by_class_name('name.table-participant'):
el = el.text.strip().split(" - ")
data["Home_Team"].append(el[0])
data["Away_Team"].append(el[1])
data["Season"].append(season_filt)
# Scores
for el in driver.find_elements_by_class_name('center.bold.table-odds.table-score'):
el = el.text.split(":")
if el[1][-3:] == " OT":
data["OT"].append(True)
el[1] = el[1][:-3]
else:
data["OT"].append(False)
data["Home_Score"].append(el[0])
data["Away_Score"].append(el[1])
# Match times
for el in driver.find_elements_by_class_name("table-time"):
data["Match_Time"].append(el.text)
# Odds
i = 0
for el in driver.find_elements_by_class_name("odds-nowrp"):
i += 1
if i%2 == 0:
data["Away_Odd"].append(el.text)
else:
data["Home_Odd"].append(el.text)
# N_Bookmakers
for el in driver.find_elements_by_class_name("center.info-value"):
data["N_Bookmakers"].append(el.text)
# TODO think of inserting the dates list in the dataframe even if it has a different size (19 rows and not 50)
except:
pass
driver.quit()
data = pd.DataFrame(data)
data.to_csv("data_odds.csv", index = False)
I would like to add this information to my dataset as two additional rows:
for el in driver.find_elements_by_class_name("first2.tl")[1:]:
el = el.text.strip().split(" - ")
data["date"].append(el[0])
data["stage"].append(el[1])
Few things I would change here.
Don't overwrite variables. You store elements in your el variable, then you over write the element with your strings. It may work for you here, but you may get yourself into trouble with that practice later on, especially since you are iterating through those elements. It makes it hard to debug too.
I know Selenium has ways to parse the html. But I personally feel BeautifulSoup is a tad easier to parse with and is a little more intuitive if you are simply just trying to pull out data from the html. So I went with BeautifulSoup's .find_previous() to get the tags that precede the games, essentially then able to get your date and stage content.
Lastly, I like to construct a list of dictionaries to make up the data frame. Each item in the list is a dictionary key:value where the key is the column name and value is the data. You sort of do the opposite in creating a dictionary of lists. Now there is nothing wrong with that, but if the lists don't have the same length, you're get an error when trying to create the dataframe. Where as with my way, if for what ever reason there is a value missing, it will still create the dataframe, but will just have a null or nan for the missing data.
There may be more work you need to do with the code to go through the pages, but this gets you the data in the form you need.
Code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import time
import pandas as pd
from bs4 import BeautifulSoup
import re
# Season to filter
seasons_filt = ['2013-2014', '2014-2015', '2015-2016','2016-2017', '2017-2018', '2018-2019']
# Define 'driver' variable and launch browser
path = "C:/Users/ALESSANDRO/Downloads/chromedriver_win32/chromedriver.exe"
driver = webdriver.Chrome(path)
rows = []
# Loop through pages based on page_num and season
for season_filt in seasons_filt:
page_num = 0
while True:
page_num += 1
# Get url and navigate it
page_str = (1 - len(str(page_num)))* '0' + str(page_num)
url ="https://www.oddsportal.com/basketball/europe/euroleague-" + str(season_filt) + "/results/#/page/" + page_str + "/"
driver.get(url)
time.sleep(3)
# Check if page has no data
if driver.find_elements_by_id("emptyMsg"):
print("Season {} ended at page {}".format(season_filt, page_num))
break
try:
soup = BeautifulSoup(driver.page_source, 'html.parser')
table = soup.find('table', {'id':'tournamentTable'})
trs = table.find_all('tr', {'class':re.compile('.*deactivate.*')})
for each in trs:
teams = each.find('td', {'class':'name table-participant'}).text.split(' - ')
scores = each.find('td', {'class':re.compile('.*table-score.*')}).text.split(':')
ot = False
for score in scores:
if 'OT' in score:
ot == True
scores = [x.replace('\xa0OT','') for x in scores]
matchTime = each.find('td', {'class':re.compile('.*table-time.*')}).text
# Odds
i = 0
for each_odd in each.find_all('td',{'class':"odds-nowrp"}):
i += 1
if i%2 == 0:
away_odd = each_odd.text
else:
home_odd = each_odd.text
n_bookmakers = soup.find('td',{'class':'center info-value'}).text
date_stage = each.find_previous('th', {'class':'first2 tl'}).text.split(' - ')
date = date_stage[0]
stage = date_stage[1]
row = {'Season':season_filt,
'Home_Team':teams[0],
'Away_Team':teams[1],
'Home_Score':scores[0],
'Away_Score':scores[1],
'OT':ot,
'Match_Time':matchTime,
'Home_Odd':home_odd,
'Away_Odd':away_odd,
'N_Bookmakers':n_bookmakers,
'Date':date,
'Stage':stage}
rows.append(row)
except:
pass
driver.quit()
data = pd.DataFrame(rows)
data.to_csv("data_odds.csv", index = False)
Output:
print(data.head(15).to_string())
Season Home_Team Away_Team Home_Score Away_Score OT Match_Time Home_Odd Away_Odd N_Bookmakers Date Stage
0 2013-2014 Real Madrid Maccabi Tel Aviv 86 98 False 18:00 -667 +493 7 18 May 2014 Final Four
1 2013-2014 Barcelona CSKA Moscow 93 78 False 15:00 -135 +112 7 18 May 2014 Final Four
2 2013-2014 Barcelona Real Madrid 62 100 False 19:00 +134 -161 7 16 May 2014 Final Four
3 2013-2014 CSKA Moscow Maccabi Tel Aviv 67 68 False 16:00 -278 +224 7 16 May 2014 Final Four
4 2013-2014 Real Madrid Olympiacos 83 69 False 18:45 -500 +374 7 25 Apr 2014 Play Offs
5 2013-2014 CSKA Moscow Panathinaikos 74 44 False 16:00 -370 +295 7 25 Apr 2014 Play Offs
6 2013-2014 Olympiacos Real Madrid 71 62 False 18:45 +127 -152 7 23 Apr 2014 Play Offs
7 2013-2014 Maccabi Tel Aviv Olimpia Milano 86 66 False 17:45 -217 +179 7 23 Apr 2014 Play Offs
8 2013-2014 Panathinaikos CSKA Moscow 73 72 False 16:30 -106 -112 7 23 Apr 2014 Play Offs
9 2013-2014 Panathinaikos CSKA Moscow 65 59 False 18:45 -125 +104 7 21 Apr 2014 Play Offs
10 2013-2014 Maccabi Tel Aviv Olimpia Milano 75 63 False 18:15 -189 +156 7 21 Apr 2014 Play Offs
11 2013-2014 Olympiacos Real Madrid 78 76 False 17:00 +104 -125 7 21 Apr 2014 Play Offs
12 2013-2014 Galatasaray Barcelona 75 78 False 17:00 +264 -333 7 20 Apr 2014 Play Offs
13 2013-2014 Olimpia Milano Maccabi Tel Aviv 91 77 False 18:45 -286 +227 7 18 Apr 2014 Play Offs
14 2013-2014 CSKA Moscow Panathinaikos 77 51 False 16:15 -303 +247 7 18 Apr 2014 Play Offs

dbGetQuery select all existing columns

I have a vector of columns that I would like to select from the databases. If the column is missing, I want to select all of the columns that exists. But, I am not sure how to specify this in my query?
For example, to select column drat I specify "SELECT drat FROM mtcars". Let's say my column names are drat and colMissing.
My query does not work "SELECT drat, colMissing FROM mtcars" as Error: no such column: colMissing .
However, I want drat exporting. How can I make sure that all existing columns will be exported, and non existing skipped? In my real data, I have a long vector of columns names and many databases, so I want to do it automatically.
Dummy example:
library(DBI)
con <- dbConnect(RSQLite::SQLite(), ":memory:")
dbWriteTable(con, "mtcars", mtcars)
dbGetQuery(con, "SELECT * FROM mtcars") # select all columns
dbGetQuery(con, "SELECT drat, wt, disp, colMissing FROM mtcars", n = 6) # does not work as contains non existing columns name. How to export only existing ones?
I don't think SQL gives you an easy way to dynamically set the columns to select in this fashion. I think the easiest way to do this type of filtering is to determine the columns to join dynamically and create the query programmatically.
cols <- c("drat", "wt", "disp", "colMissing")
cols_to_select <- intersect(dbListFields(con, "mtcars"), cols)
cols_to_select
# [1] "disp" "drat" "wt"
qry <- paste("select", paste(dbQuoteIdentifier(con, cols_to_select), collapse = ","), "from mtcars")
qry
# [1] "select `disp`,`drat`,`wt` from mtcars"
head(dbGetQuery(con, qry))
# disp drat wt
# 1 160 3.90 2.620
# 2 160 3.90 2.875
# 3 108 3.85 2.320
# 4 258 3.08 3.215
# 5 360 3.15 3.440
# 6 225 2.76 3.460
I'm taking deliberate steps here to mitigate the risk of inadvertent SQL-injection that comes with paste-ing a query together. It is feasible that column names of an existing frame could be rather stupidly-malicious. (And no, I don't think the risk of these names is real, this type of mistake is much more likely to create a syntax error.)
someframe <- data.frame(a=1,b=2)
names(someframe)[1] <- "Robert');DROP TABLE Students;--"
qry <- paste("select", paste(names(someframe), collapse = ","), "from mtcars")
qry
# [1] "select Robert');DROP TABLE Students;--,b from mtcars"
Okay, so that won't work here (despite https://xkcd.com/327/), but ... be careful when forming a query dynamically. dbQuoteIdentifier is one function with the intent of mitigating this risk. With comparison data (e.g., WHERE cyl > 5), it is much better to use parameter-binding (i.e., WHERE cyl > ?); this doesn't work in the SELECT portion, however, so caveat emptor.
As an aside ... I believe SQL-injection discussions normally focus on the parameters (within the WHERE clause) of the query, not on the fields to be selected. However, it is feasible to make this happen with field names, though it requires knowing the target table name in the injection. (I'm using SQL Server below.)
DBI::dbWriteTable(con, "#r2mt", mtcars[1:2,])
DBI::dbGetQuery(con, "select * from #r2mt")
# row_names mpg cyl disp hp drat wt qsec vs am gear carb
# 1 Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4
# 2 Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
names(someframe)[1] <- 'cyl" from #r2mt;DROP TABLE #r2mt;--'
qry <- paste("select", paste(dQuote(names(someframe)), collapse = ", "), "from #r2mt")
qry
# [1] "select \"cyl\" from #r2mt;DROP TABLE #r2mt;--\", \"b\" from #r2mt"
DBI::dbGetQuery(con, qry)
# cyl
# 1 6
# 2 6
DBI::dbGetQuery(con, "select * from #r2mt")
# Error: nanodbc/nanodbc.cpp:1655: 42000: [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid object name '#r2mt'. [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Statement(s) could not be prepared.
# <SQL> 'select * from #r2mt'
I should note that while dQuote did not protect against this, dbQuoteIdentifer did:
DBI::dbWriteTable(con, "#r2mt", mtcars[1:2,])
qry <- paste("select", paste(DBI::dbQuoteIdentifier(con, names(someframe)), collapse = ", "), "from #r2mt")
qry
# [1] "select \"cyl\"\" from #r2mt;DROP TABLE #r2mt;--\", \"b\" from #r2mt"
DBI::dbGetQuery(con, "select * from #r2mt")
# row_names mpg cyl disp hp drat wt qsec vs am gear carb
# 1 Mazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4
# 2 Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
DBI::dbGetQuery(con, qry)
# Error: nanodbc/nanodbc.cpp:1655: 42000: [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'cyl" from #r2mt;DROP TABLE #r2mt;--'. [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Invalid column name 'b'. [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Statement(s) could not be prepared.
# <SQL> 'select "cyl"" from #r2mt;DROP TABLE #r2mt;--", "b" from #r2mt'
Where the clear difference in qry is shown here:
# [1] "select \"cyl\" from #r2mt;DROP TABLE #r2mt;--\", \"b\" from #r2mt"
# [1] "select \"cyl\"\" from #r2mt;DROP TABLE #r2mt;--\", \"b\" from #r2mt"
I was unable to defeat dbQuoteIdentifier in order to stop the escaping of " in this use.

decimal in MySQL to R

library(RMySQL)
con <- dbConnect(MySQL(), dbname="hs***", host="localhost",
port = 3306, user="****",
password="****")
dbSendQuery(con, "SEt NAMES euckr")
d <- dbReadTable(con, "accidents")
str(d)
#it results :
$ lat : num 38 38 38 38 38 38 37 38 38 35 ...
$ longt : num 127 127 127 127 127 ...
structure of latitude and longitude in R says num, but I made these columns decimal(9,7) and (11,8) in MYSQL.
I want to know how to fix it.
Also, I got these error codes:
1: In .local(conn, statement, ...) :
Decimal MySQL column 14 imported as numeric
2: In .local(conn, statement, ...) :
Decimal MySQL column 15 imported as numeric
have a look at this sequence as it might be the solution to your problem - R seems to round values for displaying:
y <- "1.1000000001"
y
[1] "1.1000000001"
y1 <- as.double(y)
y1
[1] 1.1
y2 <- y1-1
y2-0.1
[1] 1.000001e-10

Undefined columns selected using panelvar package

Have anyone used panel var in R?
Currently I'm using the package panelvar of R. And I'm getting this error :
Error in `[.data.frame`(data, , c(colnames(data)[panel_identifier], required_vars)) :
undefined columns selected
And my syntax currently is:
model1<-pvargmm(
dependent_vars = c("Change.."),
lags = 2,
exog_vars = c("Price"),
transformation = "fd",
data = base1,
panel_identifier = c("id", "t"),
steps = c("twostep"),
system_instruments = FALSE,
max_instr_dependent_vars = 99,
min_instr_dependent_vars = 2L,
collapse = FALSE)
I don't know why my panel_identifier is not working, it's pretty similar to the example given by panelvar package, however, it doesn't work, I want to appoint that base1 is on data.frame format. any ideas? Also, my data is structured like this:
head(base1)
id t country DDMMYY month month_text day Date_txt year Price Open
1 1 1296 China 1-4-2020 4 Apr 1 Apr 01 2020 12588.24 12614.82
2 1 1295 China 31-3-2020 3 Mar 31 Mar 31 2020 12614.82 12597.61
High Low Vol. Change..
1 12775.83 12570.32 NA -0.0021
2 12737.28 12583.05 NA 0.0014
thanks in advance !
Check the documentation of the package and the SSRN paper. For me it helped to ensure all entered formats are identical (you can check this with str(base1) command). For example they write:
library(panelvar)
data("Dahlberg")
ex1_dahlberg_data <-
pvargmm(dependent_vars = .......
When I look at it I get
>str(Dahlberg)
'data.frame': 2385 obs. of 5 variables:
$ id : Factor w/ 265 levels "114","115","120",..: 1 1 1 1 1 1 1 1 1 2 ...
$ year : Factor w/ 9 levels "1979","1980",..: 1 2 3 4 5 6 7 8 9 1 ...
$ expenditures: num 0.023 0.0266 0.0273 0.0289 0.0226 ...
$ revenues : num 0.0182 0.0209 0.0211 0.0234 0.018 ...
$ grants : num 0.00544 0.00573 0.00566 0.00589 0.00559 ...
For example the input data must be a data.frame (in my case it had additional type specifications like tibble or data.table). I resolved it by casting as.data.frame() on it.

Code Golf: Playing Tetris

Locked. This question and its answers are locked because the question is off-topic but has historical significance. It is not currently accepting new answers or interactions.
The basics:
Consider the following tetrominoes and empty playing field:
0123456789
I O Z T L S J [ ]
[ ]
# ## ## ### # ## # [ ]
# ## ## # # ## # [ ]
# ## ## [ ]
# [ ]
[==========]
The dimensions of the playing field are fixed. The numbers at the top are just here
to indicate the column number (also see input).
Input:
1. You are given a specific playing field (based on the above) which can already be filled partly
with tetrominoes (this can be in a separate file or provided via stdin).
Sample input:
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ######]
[==========]
2. You are given a string which describes (separated by spaces) which tetromino to insert (and
drop down) at which column. Tetrominoes don't need to be rotated. Input can be read from stdin.
Sample input:
T2 Z6 I0 T7
You can assume input is 'well-formed' (or produce undefined behaviour when it's not).
Output
Render the resulting field ('full' lines must disappear) and print the score count
(every dropped line accounts for 10 points).
Sample output based on the sample input above:
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
Winner:
Shortest solution (by code character count). Usage examples are nice. Have fun golfing!
Edit: added a bounty of +500 reputation to draw some more attention to the nice efforts the answerers already made (and possibly some new solutions to this question)...
GolfScript - 181 characters
Newlines are not necessary. Output is in standard output, although some errors are present in stderr.
\10 should be replaced by the corresponding ASCII character for the program to be 181 characters.
{):X!-{2B{" #"=}%X" ":f*+-1%}%:P;:>.{\!:F;>P{\(#{3&\(#.2$&F|:F;|}%\+}%\+F![f]P+:P
;}do;{"= "&},.,7^.R+:R;[>0="#"/f*]*\+}0"R#1(XBc_""~\10"{base}:B/3/~4*"nIOZTLSJR "
";:"*~;n%)n*~ 10R*+n*
Sample I/O:
$ cat inp
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ######]
[==========]
T2 Z6 I0 T7
$ cat inp|golfscript tetris.gs 2>/dev/null
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
Tetromino compression:
Pieces are stored as three base 8 digits. This is a simple binary representation, e.g.T=[7,2,0], S=[6,3,0], J=[2,2,3]. [1] is used for the I piece in compression, but this is explicitly set to [1,1,1,1] later (i.e. the 4* in the code). All of these arrays are concatenated into a single array, which is converted into an integer, and then a string (base 126 to minimize non-printable characters, length, and not encounter utf8). This string is very short: "R#1(XBc_".
Decompression is then straightforward. We first do a base 126 conversion followed by a base 8 conversion ("~\10"{base}/, i.e. iterate through "~\10" and do a base conversion for each element). The resulting array is split into groups of 3, the array for I is fixed (3/~4*). We then convert each element to base 2 and (after removing zeros) replace each binary digit with the character of that index in the string " #" (2base{" #"=}%...-1% - note that we need to reverse the array otherwise 2 would become "# " instead of " #").
Board/piece format, dropping pieces
The board is simply an array of strings, one for each line. No work is initially done on this, so we can generate it with n/( on the input. Pieces are also arrays of strings, padded with spaces to the left for their X position, but without trailing spaces. Pieces are dropped by prepending to the array, and continuously testing whether there is a collision.
Collision testing is done by iterating through all characters in the piece, and comparing against the character of the same position on the board. We want to regard #+= and #+# as collisions, so we test whether ((piecechar&3)&boardchar) is nonzero. While doing this iteration, we also update (a copy of) the board with ((piecechar&3)|boardchar), which correctly sets the value for pairs #+, +#, +[. We use this updated board if there is a collision after moving the piece down another row.
Removing filled rows is quite simple. We remove all rows for which "= "& return false. A filled row will have neither = or , so the conjunction will be a blank string, which equates to false. Then we count the number of rows that have been removed, add the count to the score and prepend that many "[ ... ]"s. We generate this compactly by taking the first row of the grid and replacing # with .
Bonus
Since we compute what the board would look like in each position of the piece as it falls, we can keep these on the stack instead of deleting them! For a total of three characters more, we can output all these positions (or two characters if we have the board states single spaced).
{):X!-{2B{" #"=}%X" ":f*+-1%}%:P;:>.{>[f]P+:P(!:F;{\(#{3&\(#.2$&F|:F;|}%\+}%\+F!}
do;{"= "&},.,7^.R+:R;[>0="#"/f*]*\+}0"R#1(XBc_""~\10"{base}:B/3/~4*"nIOZTLSJR "
";:"*~;n%)n*~ ]{n*n.}/10R*
Perl, 586 523 483 472 427 407 404 386 387 356 353 chars
(Needs Perl 5.10 for the defined-or // operator).
Takes all input from stdin. Still needs some serious golfing.
Note that ^Q represents ASCII 17 (DC1/XON), ^C represents ASCII 3 and ^# represents ASCII 0 (NUL).
while(<>){push#A,[split//]if/]/;while(/\w/g){for$i(0..6){for($f=0,$j=4;$j--;){$c=0;map{if($_){$i--,$f=$j=3,redo if$A[$k=$i+$j][$C=$c+$'+1]ne$";$A[$k][$C]="#"if$f}$c++}split//,unpack"b*",chr vec"3^#'^#c^#^Q^C6^#\"^C^Q^Q",index(OTZLSJI,$&)*4+$j,4;$s+=10,#A[0..$k]=#A[$k,0..$k-1],map{s/#/ /}#{$A[0]},$i++if 9<grep/#/,#{$A[$k]}}last if$f}}}print+(map#$_,#A),$s//0,$/
Commented version:
while(<>){
# store the playfield as an AoA of chars
push#A,[split//]if/]/;
# while we're getting pieces
while(/\w/g){
# for each line of playfield
for$i(0..6){
# for each line of current piece
for($f=0,$j=4;$j--;){
# for each column of current piece
$c=0;
map{
if($_){
# if there's a collision, restart loop over piece lines
# with a mark set and playfield line decremented
$i--,$f=$j=3,redo if$A[$k=$i+$j][$C=$c+$'+1]ne$";
# if we already found a collision, draw piece
$A[$k][$C]="#"if$f
}
$c++
# pieces are stored as a bit vector, 16 bits (4x4) per piece,
# expand into array of 1's and 0's
}split//,unpack"b*",chr vec"3^#'^#c^#^Q^C6^#\"^C^Q^Q",index(OTZLSJI,$&)*4+$j,4;
# if this playfield line is full, remove it. Done by array slicing
# and substituting all "#"'s in line 0 with " "'s
$s+=10,#A[0..$k]=#A[$k,0..$k-1],map{s/#/ /}#{$A[0]},$i++if 9<grep/#/,#{$A[$k]}
}
# if we found a collision, stop iterating over the playfield and get next piece from input
last if$f
}
}
}
# print everything
print+(map#$_,#A),$s//0,$/
Edit 1: some serious golfing, fix output bug.
Edit 2: some inlining, merged two loops into one for a net saving of (drum roll...) 3 chars, misc golfing.
Edit 3: some common subexpression elimination, a little constant merging and tweaked a regex.
Edit 4: changed representation of tetrominoes into a packed bit vector, misc golfing.
Edit 5: more direct translation from tetromino letter to array index, use non-printable characters, misc golfing.
Edit 6: fixed bug cleaning top line, introduced in r3 (edit 2), spotted by Nakilon. Use more non-printable chars.
Edit 7: use vec for getting at tetromino data. Take advantage of the fact that the playfield has fixed dimensions. if statement => if modifier, the merging of loops of edit 2 starts paying off. Use // for the 0-score case.
Edit 8: fixed another bug, introduced in r6 (edit 5), spotted by Nakilon.
Edit 9: don't create new references when clearing lines, just move references around via array slicing. Merge two map's into one. Smarter regex. "Smarter" for. Misc golfings.
Edit 10: inlined tetromino array, added commented version.
Ruby — 427 408 398 369 359
t=[*$<]
o=0
u=->f{f.transpose}
a=u[t.reverse.join.scan /#{'( |#)'*10}/]
t.pop.split.map{|w|m=(g='I4O22Z0121T01201L31S1201J13'[/#{w[0]}\d+/].scan(/0?\d/).zip a.drop w[1].to_i).map{|r,b|(b.rindex ?#or-1)-r.size+1}.max
g.map{|r,b|b.fill ?#,m+r.size,r.to_i}
v=u[a]
v.reject!{|i|i-[?#]==[]&&(o+=10;v)<<[' ']*10}
a=u[v]}
puts u[a].reverse.map{|i|?[+i*''+?]},t[-1],o
Bash shell script (301 304 characters)
UPDATE: Fixed a bug involving pieces that extend into the top row. Also, the output is now sent to standard out, and as a bonus, it is possible to run the script again to continue playing a game (in which case you must add up the total score yourself).
This includes nonprintable characters, so I have provided a hex dump. Save it as tetris.txt:
0000000: 7461 696c 202d 3120 245f 7c7a 6361 743e tail -1 $_|zcat>
0000010: 753b 2e20 750a 1f8b 0800 35b0 b34c 0203 u;. u.....5..L..
0000020: 5590 516b 8330 10c7 dff3 296e 4c88 ae64 U.Qk.0....)nL..d
0000030: a863 0c4a f57d 63b0 07f7 b452 88d1 b4da .c.J.}c....R....
0000040: 1a5d 5369 91a6 df7d 899a d05d 5e72 bfbb .]Si...}...]^r..
0000050: fbff 2fe1 45d5 0196 7cff 6cce f272 7c10 ../.E...|.l..r|.
0000060: 387d 477c c4b1 e695 855f 77d0 b29f 99bd 8}G|....._w.....
0000070: 98c6 c8d2 ef99 8eaa b1a5 9f33 6d8c 40ec ...........3m.#.
0000080: 6433 8bc7 eeca b57f a06d 27a1 4765 07e6 d3.......m'.Ge..
0000090: 3240 dd02 3df1 2344 f04a 0d1d c748 0bde 2#..=.#D.J...H..
00000a0: 75b8 ed0f 9eef 7bd7 7e19 dd16 5110 34aa u.....{.~...Q.4.
00000b0: c87b 2060 48a8 993a d7c0 d210 ed24 ff85 .{ `H..:.....$..
00000c0: c405 8834 548a 499e 1fd0 1a68 2f81 1425 ...4T.I....h/..%
00000d0: e047 bc62 ea52 e884 42f2 0f0b 8b37 764c .G.b.R..B....7vL
00000e0: 17f9 544a 5bbd 54cb 9171 6e53 3679 91b3 ..TJ[.T..qnS6y..
00000f0: 2eba c07a 0981 f4a6 d922 89c2 279f 1ab5 ...z....."..'...
0000100: 0656 c028 7177 4183 2040 033f 015e 838b .V.(qwA. #.?.^..
0000110: 0d56 15cf 4b20 6ff3 d384 eaf3 bad1 b9b6 .V..K o.........
0000120: 72be 6cfa 4b2f fb03 45fc cd51 d601 0000 r.l.K/..E..Q....
Then, at the bash command prompt, preferably with elvis rather than vim installed as vi:
$ xxd -r tetris.txt tetris.sh
$ chmod +x tetris.sh
$ cat << EOF > b
> [ ]
> [ ]
> [ ]
> [ ]
> [ # # #]
> [ ## ######]
> [==========]
> EOF
$ ./tetris.sh T2 Z6 I0 T7 2>/dev/null
-- removed stuff that is not in standard out --
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
How it works
The code self-extracts itself similarly to how executable programs compressed using the gzexe script do. Tetromino pieces are represented as sequences of vi editor commands. Character counting is used to detect collisions, and line counting is used to calculate the score.
The unzipped code:
echo 'rej.j.j.:wq!m'>I
echo '2rejh.:wq!m'>O
echo '2rej.:wq!m'>Z
echo '3rejh1.:wq!m'>T
echo 'rej.j2.:wq!m'>L
echo 'l2rej2h.:wq!m'>S
echo 'lrej.jh2.:wq!m'>J
for t
do for y in `seq 1 5`
do echo -n ${y}jk$((${t:1}+1))l|cat - ${t:0:1}|vi b>0
grep ========== m>0||break
[ `tr -cd '#'<b|wc -c` = `tr -cd '#'<m|wc -c` ]||break
tr e '#'<m>n
done
cat n>b
grep -v '##########' b>m
$((S+=10*(`wc -l < b`-`wc -l < m`)))
yes '[ ]'|head -7|cat - m|tail -7>b
done
cat b
echo $S
The original code before golfing:
#!/bin/bash
mkpieces() {
pieces=('r#j.j.j.' '2r#jh.' '2r#j.' '3r#jh1.' 'r#j.j2.' 'l2r#j2h.' 'lr#j.jh2.')
letters=(I O Z T L S J)
for j in `seq 0 9`; do
for i in `seq 0 6`; do
echo "jk$(($j+1))l${pieces[$i]}:wq! temp" > ${letters[$i]}$j
done
done
}
counthashes() {
tr -cd '#' < $1 | wc -c
}
droppiece() {
for y in `seq 1 5`; do
echo -n $y | cat - $1 | vi board > /dev/null
egrep '={10}' temp > /dev/null || break
[ `counthashes board` -eq `counthashes temp` ] || break
tr # "#" < temp > newboard
done
cp newboard board
}
removelines() {
egrep -v '#{10}' board > temp
SCORE=$(($SCORE + 10 * (`wc -l < board` - `wc -l < temp`)))
yes '[ ]' | head -7 | cat - temp | tail -7 > board
}
SCORE=0
mkpieces
for piece; do
droppiece $piece
removelines
done
cat board
echo $SCORE
Python: 504 519 chars
(Python 3 solution) Currently requires to set the input in the format as shown at the top (input code is not counted). I'll expand to read from file or stdin later. Now works with a prompt, just paste the input in (8 lines total).
R=range
f,p=[input()[1:11]for i in R(7)],p
for(a,b)in input().split():
t=[' '*int(b)+r+' '*9for r in{'I':'#,#,#,#','O':'##,##','Z':'##, ##','T':'###, # ','L':'#,#,##','S':' ##,##','J':' #, #,##'}[a].split(',')]
for r in R(6-len(t),0,-1):
for i in R(len(t)):
if any(a==b=='#'for(a,b)in zip(t[i],f[r+i])):break
else:
for i in R(0,len(t)):
f[r+i]=''.join(a if b!='#'else b for(a,b)in zip(t[i],f[r+i]))
if f[r+i]=='#'*10:del f[r+i];f[0:0]=[' '*10];p+=10
break
print('\n'.join('['+r+']'for r in f[:7]),p,sep='\n')
Not sure if I can save much more there. Quite a lot characters are lost from the transformation to bitfields, but that saves a lot more characters than working with the strings. Also I'm not sure if I can remove more whitespace there, but I'll try it later.
Won't be able to reduce it much more; after having the bitfield-based solution, I transitioned back to strings, as I found a way to compress it more (saved 8 characters over the bitfield!). But given that I forgot to include the L and had an error with the points inside, my character count only goes up sigh... Maybe I find something later to compress it a bit more, but I think I'm near the end. For the original and commented code see below:
Original version:
field = [ input()[1:11] for i in range(7) ] + [ 0, input() ]
# harcoded tetrominoes
tetrominoes = {'I':('#','#','#','#'),'O':('##','##'),'Z':('##',' ##'),'T':('###',' # '),'L':('#','#','##'),'S':(' ##','##'),'J':(' #',' #','##')}
for ( f, c ) in field[8].split():
# shift tetromino to the correct column
tetromino = [ ' ' * int(c) + r + ' ' * 9 for r in tetrominoes[f] ]
# find the correct row to insert
for r in range( 6 - len( tetromino ), 0, -1 ):
for i in range( len( tetromino ) ):
if any( a == b == '#' for (a,b) in zip( tetromino[i], field[r+i] ) ):
# skip the row if some pieces overlap
break
else:
# didn't break, insert the tetromino
for i in range( 0, len( tetromino ) ):
# merge the tetromino with the field
field[r+i] = ''.join( a if b != '#' else b for (a,b) in zip( tetromino[i], field[r+i] ) )
# check for completely filled rows
if field[r+i] == '#' * 10:
# remove current row
del field[r+i]
# add new row
field[0:0] = [' '*10]
field[7] += 10
# we found the row, so abort here
break
# print it in the requested format
print( '\n'.join( '[' + r + ']' for r in field[:7] ) )
# and add the points = 10 * the number of redundant lines at the end
print( str( field[7] ) )
Ruby 1.9, 357 355 353 339 330 310 309 chars
d=0
e=[*$<]
e.pop.split.map{|f|f="L\003\003\007J\005\005\007O\007\007Z\007\013S\013\007I\003\003\003\003T\017\005"[/#{f[j=0]}(\W*)/,1].bytes.map{|z|?\0+?\0*f[1].hex+z.to_s(2).tr("01"," #")[1,9]}
k,f,i=i,[p]+f,e.zip(f).map{|l,m|l.bytes.zip(m.to_s.bytes).map{|n,o|j|=n&3&q=o||0;(n|q).chr}*""}until j>0
e=[]
e+=k.reject{|r|r.sum==544&&e<<r.tr(?#,?\s)&&d+=10}}
puts e,d
Note that the \000 escapes (including the null bytes on the third line) should be replaced with their actual nonprintable equivalent.
Sample input:
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ######]
[==========]
T2 Z6 I0 T7
Usage:
ruby1.9 tetris.rb < input
or
ruby1.9 tetris.rb input
C, 727 [...] 596 581 556 517 496 471 461 457 chars
This is my first code golf, I think character count can get much lower, would be nice if experienced golfers can give me some hints.
The current version can handle playfields with different dimensions, too. The input can have linebreaks in both DOS/Windows and Unix format.
The code was pretty straightforward before optimization, the tetrominoes are stored in 4 integers that are interpreted as an (7*3)x4 bit array, the playfield is stored as-is, tiles are dropped and complete lines are removed at start and after each tile drop.
I wasn't sure how to count characters, so I used the filesize of the code with all unneccessary linebreaks removed.
EDIT 596=>581: Thanks to KitsuneYMG, everything except the %ls suggestion worked perfectly, additionally, I noticed putch instead of putchar can be used (getch somehow doesn't work) and removed all the parentheses in #define G.
EDIT 581=>556: Wasn't satisfied with the remaining for and the nested F loops, so there was some merging, changing and removing of loops, quite confusing but definitely worth it.
EDIT 556=>517: Finally found a way to make a an int array. Some N; merged with c, no break anymore.
EDIT 496=>471: Playfield width and height fixed now.
EDIT 471=>461: Minor modifications, putchar used again as putch is no standard function.
EDIT: Bugfix, complete lines were removed before tile drop instead of after, so complete lines could be left at the end. Fix doesn't change the character count.
#define N (c=getchar())
#define G T[j%4]&1<<t*3+j/4
#define X j%4*w+x+j/4
#define F(x,m) for(x=0;x<m;x++)
#define W while
T[]={916561,992849,217,1},C[99],c,i,j,s,t,x,A,a[99],w=13;
main(){F(j,7)C["IJLSTZO"[j]]=j;
F(j,91)a[j]=N;
W(N>w){t=C[c];x=N-86;
W(c){F(j,12)if(G&&X>1?a[X]-32:0)c=0;
F(j,12)if(G&&X>w&&!c)a[X-w]=35;x+=w;}N;
F(i,6){A=0;t=i*w;F(x,w)A|=(a[t+x]==32);
if(!A){s++;F(j,t)a[t+w-j]=a[t-j];
x=1;W(a[x]-93)a[x++]=32;}}}
F(i,91)putchar(a[i]);printf("%i0",s);}
Python 2.6+ - 334 322 316 characters
397 368 366 characters uncompressed
#coding:l1
exec'xÚEPMO!½ï¯ i,P*Ýlš%ì­‰=‰Ö–*†­þz©‰:‡—Lò¾fÜ”bžAù,MVi™.ÐlǃwÁ„eQL&•uÏÔ‹¿1O6ǘ.€LSLÓ’¼›î”3òšL¸tŠv[ѵl»h;ÁºŽñÝ0Àë»Ç‡ÛûH.ª€¼âBNjr}¹„V5¾3Dë#¼¡•gO. ¾ô6 çÊsÃЮürÃ1&›ßVˆ­ùZ`Ü€ÿžcx±ˆ‹sCàŽ êüRô{U¯ZÕDüE+³ŽFA÷{CjùYö„÷¦¯Î[0þøõ…(Îd®_›â»E#–Y%’›”ëýÒ·X‹d¼.ß9‡kD'.decode('zip')
The single newline is required, and I've counted it as one character.
Browser code-page mumbo jumbo might prevent a successful copy-and-paste of this code, so you can optionally generate the file from this code:
s = """
23 63 6F 64 69 6E 67 3A 6C 31 0A 65 78 65 63 27 78 DA 45 50 4D 4F 03 21
10 BD EF AF 20 69 2C 50 2A 02 DD 6C 9A 25 EC AD 07 8D 89 07 3D 89 1C D6
96 2A 86 05 02 1B AD FE 7A A9 89 3A 87 97 4C F2 BE 66 DC 94 62 9E 41 F9
2C 4D 56 15 69 99 0F 2E D0 6C C7 83 77 C1 16 84 65 51 4C 26 95 75 CF 8D
1C 15 D4 8B BF 31 4F 01 36 C7 98 81 07 2E 80 4C 53 4C 08 D3 92 BC 9B 11
EE 1B 10 94 0B 33 F2 9A 1B 4C B8 74 8A 9D 76 5B D1 B5 6C BB 13 9D 68 3B
C1 BA 8E F1 DD 30 C0 EB BB C7 87 DB FB 1B 48 8F 2E 1C AA 80 19 BC E2 42
4E 6A 72 01 7D B9 84 56 35 BE 33 44 8F 06 EB 40 BC A1 95 67 4F 08 2E 20
BE F4 36 A0 E7 CA 73 C3 D0 AE FC 72 C3 31 26 9B DF 56 88 AD F9 5A 60 DC
80 FF 9E 63 78 B1 88 8B 73 43 E0 8E A0 EA FC 52 F4 7B 55 8D AF 5A 19 D5
44 FC 45 2B B3 8E 46 9D 41 F7 7B 43 6A 12 F9 59 F6 84 F7 A6 01 1F AF CE
5B 30 FE F8 F5 85 28 CE 64 AE 5F 9B E2 BB 45 23 96 59 25 92 9B 94 EB FD
10 D2 B7 58 8B 64 BC 2E DF 39 87 6B 44 27 2E 64 65 63 6F 64 65 28 27 7A
69 70 27 29
"""
with open('golftris.py', 'wb') as f:
f.write(''.join(chr(int(i, 16)) for i in s.split()))
Testing
intetris
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ######]
[==========]
T2 Z6 I0 T7
Newlines must be Unix-style (linefeed only). A trailing newline on the last line is optional.
To test:
> python golftris.py < intetris
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
This code unzips the original code, and executes it with exec. This decompressed code weighs in at 366 characters and looks like this:
import sys
r=sys.stdin.readlines();s=0;p=r[:1];a='[##########]\n'
for l in r.pop().split():
n=int(l[1])+1;i=0xE826408E26246206601E>>'IOZTLSJ'.find(l[0])*12;m=min(zip(*r[:6]+[a])[n+l].index('#')-len(bin(i>>4*l&31))+3for l in(0,1,2))
for l in range(12):
if i>>l&2:c=n+l/4;o=m+l%4;r[o]=r[o][:c]+'#'+r[o][c+1:]
while a in r:s+=10;r.remove(a);r=p+r
print''.join(r),s
Newlines are required, and are one character each.
Don't try to read this code. The variable names are literally chosen at random in search of the highest compression (with different variable names, I saw as much as 342 characters after compression). A more understandable version follows:
import sys
board = sys.stdin.readlines()
score = 0
blank = board[:1] # notice that I rely on the first line being blank
full = '[##########]\n'
for piece in board.pop().split():
column = int(piece[1]) + 1 # "+ 1" to skip the '[' at the start of the line
# explanation of these three lines after the code
bits = 0xE826408E26246206601E >> 'IOZTLSJ'.find(piece[0]) * 12
drop = min(zip(*board[:6]+[full])[column + x].index('#') -
len(bin(bits >> 4 * x & 31)) + 3 for x in (0, 1, 2))
for i in range(12):
if bits >> i & 2: # if the current cell should be a '#'
x = column + i / 4
y = drop + i % 4
board[y] = board[y][:x] + '#' + board[y][x + 1:]
while full in board: # if there is a full line,
score += 10 # score it,
board.remove(full) # remove it,
board = blank + board # and replace it with a blank line at top
print ''.join(board), score
The crux is in the three cryptic lines I said I'd explain.
The shape of the tetrominoes is encoded in the hexadecimal number there. Each tetronimo is considered to occupy a 3x4 grid of cells, where each cell is either blank (a space) or full (a number sign). Each piece is then encoded with 3 hexadecimal digits, each digit describing one 4-cell column. The least significant digits describe the left-most columns, and the least significant bit in each digit describes the top-most cell in each column. If a bit is 0, then that cell is blank, otherwise it's a '#'. For example, the I tetronimo is encoded as 00F, with the four bits of the least-significant digit set on to encode the four number signs in the left-most column, and the T is 131, with the top bit set on the left and the right, and the top two bits set in the middle.
The entire hexadecimal number is then shift one bit to the left (multiplied by two). This will allow us to ignore the bottom-most bit. I'll explain why in a minute.
So given the current piece from the input, we find the index into this hexadecimal number where the 12 bits describing it's shape begin, then shift that down so that bits 1–12 (skipping bit 0) of the bits variable describe the current piece.
The assignment to drop determines how many rows from the top of the grid the piece will fall before landing on other piece fragments. The first line finds how many empty cells there are at the top of each column of the playing field, while the second finds the lowest occupied cell in each column of the piece. The zip function returns a list of tuples, where each tuple consists of the nth cell from each item in the input list. So, using the sample input board, zip(board[:6] + [full]) will return:
[
('[', '[', '[', '[', '[', '[', '['),
(' ', ' ', ' ', ' ', ' ', ' ', '#'),
(' ', ' ', ' ', ' ', '#', '#', '#'),
(' ', ' ', ' ', ' ', ' ', '#', '#'),
(' ', ' ', ' ', ' ', ' ', ' ', '#'),
(' ', ' ', ' ', ' ', ' ', '#', '#'),
(' ', ' ', ' ', ' ', ' ', '#', '#'),
(' ', ' ', ' ', ' ', '#', '#', '#'),
(' ', ' ', ' ', ' ', ' ', '#', '#'),
(' ', ' ', ' ', ' ', ' ', '#', '#'),
(' ', ' ', ' ', ' ', '#', '#', '#'),
(']', ']', ']', ']', ']', ']', ']')
]
We select the tuple from this list corresponding to the appropriate column, and find the index of the first '#' in the column. This is why we appended a "full" row before calling zip, so that index will have a sensible return (instead of throwing an exception) when the column is otherwise blank.
Then to find the lowest '#' in each column of the piece, we shift and mask the four bits that describe that column, then use the bin function to turn that into a string of ones and zeros. The bin function only returns significant bits, so we need only calculate the length of this string to find the lowest occupied cell (most significant set bit). The bin function also prepends '0b', so we have to subtract that. We also ignore the least significant bit. This is why the hexadecimal number is shift one bit to the left. This is to account for empty columns, whose string representations would have the same length as a column with only the top cell full (such as the T piece).
For example, the columns of the I tetromino, as mentioned earlier, are F, 0, and 0. bin(0xF) is '0b1111'. After ignoring the '0b', we have a length of 4, which is correct. But bin(0x0) is 0b0. After ignoring the '0b', we still have a length of' 1, which is incorrect. To account for this, we've added an additional bit to the end, so that we can ignore this insignificant bit. Hence, the +3 in the code is there to account for the extra length taken up by the '0b' at the beginning, and the insignificant bit at the end.
All of this occurs within a generator expression for three columns ((0,1,2)), and we take the min result to find the maximum number of rows the piece can drop before it touches in any of the three columns.
The rest should be pretty easy to understand by reading the code, but the for loop following these assignments adds the piece to the board. After this, the while loop removes full rows, replacing them with blank rows at the top, and tallies the score. At the end, the board and score are printed to the output.
Python, 298 chars
Beats all non-esoteric language solutions so far (Perl, Ruby, C, bash...)
... and does not even use code-zipping chicanery.
import os
r=os.read
b='[%11c\n'%']'*99+r(0,91)
for k,v in r(0,99).split():
t=map(ord,' -:G!.:; -:; !-.!"-. !". !./')['IJLOSTZ'.find(k)*4:][:4];v=int(v)-31
while'!'>max(b[v+j+13]for j in t):v+=13
for j in t:b=b[:v+j]+'#'+b[v+j+1:]
b=b.replace('[##########]\n','')
print b[-91:],1060-10*len(b)/13
On the test example
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ######]
[==========]
T2 Z6 I0 T7
it outputs
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
PS. fixed a bug pointed out by Nakilon at cost of +5
Golfscript 260 chars
I'm sure this could be improved, I'm kind of new to Golfscript.
[39 26.2/0:$14{.(}:?~1?15?1?14 2??27?13.!14?2?27?14 1]4/:t;n/)\n*:|;' '/-1%.,:c;~{)18+:&;'XIOZTLSJX'\%~;,1-t\={{.&+.90>{;.}*|\=32=!{&13-:&;}*}%}6*{&+}/|{\.#<'#'+\)|>+}4*{'['\10*']'++}:
;n/0\~n+:|;0\{.'#'
={;)}{n+|+:|;}if\.}do;' '
n+\.#*|+\$+:$;.,1-<:|;}c*|n?$*
End of lines are relevant (there shouldn't be one at the end). Anyway, here are some of the test cases I used:
> cat init.txt
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ######]
[==========]
T2 Z6 I0 T7> cat init.txt | ruby golfscript.rb tetris.gsc
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
> cat init.txt
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ##### ]
[==========]
I0 O7 Z1 S4> cat init.txt | ruby golfscript.rb tetris.gsc
[ ]
[ ]
[ ]
[# ]
[### #### ]
[### ##### ]
[==========]
10
> cat init.txt
[ ]
[ ]
[ ]
[ ## ### ]
[ # # ]
[ ## ######]
[==========]
T7 I0 I3> cat init.txt | ruby golfscript.rb tetris.gsc
[ ]
[ ]
[ ]
[ ]
[# # ]
[## # # # ]
[==========]
20
Note that there is no end of line in the input file, an end of line would break the script as is.
O'Caml 809 782 Chars
open String let w=length let c s=let x=ref 0in iter(fun k->if k='#'then incr x)s;!x open List let(#),g,s,p,q=nth,ref[],ref 0,(0,1),(0,2)let l=length let u=Printf.printf let rec o x i j=let a=map(fun s->copy s)!g in if snd(fold_left(fun(r,k)(p,l)->let z=c(a#r)in blit(make l '#')0(a#r)(i+p)l;if c(a#r)=z+l then r+1,k else r,false)(j-l x+1,true)x)then g:=a else o x i(j-1)and f x=let s=read_line()in if s.[1]='='then g:=rev x else f(sub s 1 10::x)let z=f [];read_line();;for i=0to w z/3 do o(assoc z.[i*3]['I',[p;p;p;p];'O',[q;q];'Z',[q;1,2];'T',[0,3;1,1];'L',[p;p;q];'S',[1,2;q];'J',[1,1;1,1;q]])(Char.code z.[i*3+1]-48)(l!g-1);let h=l!g in g:=filter(fun s->c s<>w s)!g;for i=1to h-(l!g)do incr s;g:=make 10' '::!g done;done;iter(fun r->u"[%s]\n"r)!g;u"[==========]\n";u"%d\n"(!s*10)
Common Lisp 667 657 645 Chars
My first attempt at code golf, so there are probably many tricks that I don't know yet. I left some newlines there to keep some residual "readability" (I counted newlines as 2 bytes, so removing 6 unnecessary newlines gains 12 more characters).
In input, first put the shapes then the field.
(let(b(s 0)m(e'(0 1 2 3 4 5 6 7 8 9)))
(labels((o(p i)(mapcar(lambda(j)(+ i j))p))(w(p r)(o p(* 13 r)))(f(i)(find i b))
(a(&aux(i(position(read-char)"IOZTLSJ")))(when i(push(o(nth i'((0 13 26 39)(0 1 13 14)(0 1 14 15)(0 1 2 14)(0 13 26 27)(1 2 13 14)(1 14 26 27)))(read))m)(a))))
(a)(dotimes(i 90)(if(find(read-char)"#=")(push i b)))(dolist(p(reverse m))
(setf b`(,#b,#(w p(1-(position-if(lambda(i)(some #'f(w p i)))e)))))
(dotimes(i 6)(when(every #'f(w e i))(setf s(1+ s)b(mapcar(lambda(k)(+(if(>(* 13 i)k)13(if(<=(* 13(1+ i))k)0 78))k))b)))))
(dotimes(i 6)(format t"[~{~:[ ~;#~]~}]
"(mapcar #'f(w e i))))(format t"[==========]
~a0"s)))
Testing
T2 Z6 I0 T7
[ ]
[ ]
[ ]
[ ]
[ # # #]
[ ## ######]
[==========]
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
NIL
Ruby 505 479 474 442 439 426 chars
A first attempt. Have done it with IronRuby. I'm sure it can be improved, but I really should get some work done today!
p,q,r,s=(0..9),(0..2),(0..6),0
t=[*$<]
f=p.map{|a|g=0;r.map{|b|g+=2**b if t[6-b][a+1]==?#};g}
t.pop.split.map{|x|w,y=[15,51,306,562,23,561,113]["IOZTLSJ"=~/#{x[0]}/],x[1].to_i
l=q.map{|d|r.inject{|b,c|f[d+y]&(w>>(d*4)&15-c+1)>0?c:b}}.max
q.map{|b|f[b+y]|=w>>(b*4)&15-l}
r.map{i=f.inject{|a,b|a&b};f.map!{|a|b=i^(i-1);a=((a&~b)>>1)+(a&(b>>1))};s+=i>0?10:0}}
p.map{|a|r.map{|b|t[6-b][a+1]=f[a]&2**b>0??#:' '}}
puts t,s
Testing
cat test.txt | ruby tetris.rb
[ ]
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10
Edit
Now using normal ruby. Got the walls output..
Another one in Ruby, 573 546 characters:**
Z={I:?#*4,J:'#,###',L:'###,#',O:'##,##',S:'#,##, #',Z:' #,##,#',T:' #,##, #'}
t=[*$<]
R=->s{s.reverse}
T=->m{m.transpose}
a = T[R[t].join.scan /.#{'(\D)'*10}.$/]
t.pop.split.each{|z|
t,o=Z[z[0].to_sym].split(',').map{|x|x.split //},z[1].to_i
r=0..t.size-1
y=r.map{|u|1+a[o+u].rindex(?#).to_i-t[u].count(' ')}.max
(0..3).each{|i|r.each{|j|t[j][i]==?#&&a[o+j][y+i]=t[j][i]}}}
s=0
a.each{|x|s=a.max_by(&:size).size;x[s-=1]||=' 'while s>0}
a=R[T[a].reject{|x|x*''=~/[#]{10}/&&s+=10}.map{|x|?[+x*''+?]}[0..6]]
puts (0..8-a.size).map{?[+' '*10+?]},a,s
Testing:
cat test.txt | ruby 3858384_tetris.rb
[ ]
[ ]
[ ]
[ ]
[# ###]
[# ### ]
[##### ####]
[==========]
10