78
78
#error "no wait set implementation available"
79
79
#endif
80
80
81
+ /*
82
+ * Connection pooler and mtm need to delete events from event set.
83
+ * As far as we have too preserve positions of all other events,
84
+ * we can not move events. So we have to maintain list of free events.
85
+ * But poll/WaitForMultipleObjects manipulates with array of listened events.
86
+ * That is why elements in pollfds and handle arrays should be stored without holes
87
+ * and we need to maintain mapping between them and WaitEventSet events.
88
+ * This mapping is stored in "permutation" array. Also we need backward mapping
89
+ * (from event to descriptors array) which is implemented using "index" field of WaitEvent.
90
+ */
91
+
81
92
/* typedef in latch.h */
82
93
struct WaitEventSet
83
94
{
84
95
int nevents ; /* number of registered events */
85
96
int nevents_space ; /* maximum number of events in this set */
86
97
98
+ /*
99
+ * L1-list of free events linked by "pos" and terminated by -1.
100
+ */
101
+ int free_events ;
102
+
87
103
/*
88
104
* Array, of nevents_space length, storing the definition of events this
89
105
* set is waiting for.
90
106
*/
91
107
WaitEvent * events ;
92
108
109
+ int * permutation ; /* indexes of used events (see comment above) */
110
+
93
111
/*
94
112
* If WL_LATCH_SET is specified in any wait event, latch is a pointer to
95
113
* said latch, and latch_pos the offset in the ->events array. This is
@@ -150,9 +168,9 @@ static void WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action
150
168
#elif defined(WAIT_USE_KQUEUE )
151
169
static void WaitEventAdjustKqueue (WaitEventSet * set , WaitEvent * event , int old_events );
152
170
#elif defined(WAIT_USE_POLL )
153
- static void WaitEventAdjustPoll (WaitEventSet * set , WaitEvent * event );
171
+ static void WaitEventAdjustPoll (WaitEventSet * set , WaitEvent * event , bool remove );
154
172
#elif defined(WAIT_USE_WIN32 )
155
- static void WaitEventAdjustWin32 (WaitEventSet * set , WaitEvent * event );
173
+ static void WaitEventAdjustWin32 (WaitEventSet * set , WaitEvent * event , bool remove );
156
174
#endif
157
175
158
176
static inline int WaitEventSetWaitBlock (WaitEventSet * set , int cur_timeout ,
@@ -574,6 +592,7 @@ CreateWaitEventSet(MemoryContext context, int nevents)
574
592
*/
575
593
sz += MAXALIGN (sizeof (WaitEventSet ));
576
594
sz += MAXALIGN (sizeof (WaitEvent ) * nevents );
595
+ sz += MAXALIGN (sizeof (int ) * nevents );
577
596
578
597
#if defined(WAIT_USE_EPOLL )
579
598
sz += MAXALIGN (sizeof (struct epoll_event ) * nevents );
@@ -594,23 +613,23 @@ CreateWaitEventSet(MemoryContext context, int nevents)
594
613
set -> events = (WaitEvent * ) data ;
595
614
data += MAXALIGN (sizeof (WaitEvent ) * nevents );
596
615
616
+ set -> permutation = (int * ) data ;
617
+ data += MAXALIGN (sizeof (int ) * nevents );
618
+
597
619
#if defined(WAIT_USE_EPOLL )
598
620
set -> epoll_ret_events = (struct epoll_event * ) data ;
599
- data += MAXALIGN (sizeof (struct epoll_event ) * nevents );
600
621
#elif defined(WAIT_USE_KQUEUE )
601
622
set -> kqueue_ret_events = (struct kevent * ) data ;
602
- data += MAXALIGN (sizeof (struct kevent ) * nevents );
603
623
#elif defined(WAIT_USE_POLL )
604
624
set -> pollfds = (struct pollfd * ) data ;
605
- data += MAXALIGN (sizeof (struct pollfd ) * nevents );
606
625
#elif defined(WAIT_USE_WIN32 )
607
- set -> handles = (HANDLE ) data ;
608
- data += MAXALIGN (sizeof (HANDLE ) * nevents );
626
+ set -> handles = (HANDLE * ) data ;
609
627
#endif
610
628
611
629
set -> latch = NULL ;
612
630
set -> nevents_space = nevents ;
613
631
set -> exit_on_postmaster_death = false;
632
+ set -> free_events = -1 ;
614
633
615
634
#if defined(WAIT_USE_EPOLL )
616
635
if (!AcquireExternalFD ())
@@ -702,12 +721,11 @@ FreeWaitEventSet(WaitEventSet *set)
702
721
close (set -> kqueue_fd );
703
722
ReleaseExternalFD ();
704
723
#elif defined(WAIT_USE_WIN32 )
705
- WaitEvent * cur_event ;
724
+ int i ;
706
725
707
- for (cur_event = set -> events ;
708
- cur_event < (set -> events + set -> nevents );
709
- cur_event ++ )
726
+ for (i = 0 ; i < set -> nevents ; i ++ )
710
727
{
728
+ WaitEvent * cur_event = & set -> events [set -> permutation [i ]];
711
729
if (cur_event -> events & WL_LATCH_SET )
712
730
{
713
731
/* uses the latch's HANDLE */
@@ -720,7 +738,7 @@ FreeWaitEventSet(WaitEventSet *set)
720
738
{
721
739
/* Clean up the event object we created for the socket */
722
740
WSAEventSelect (cur_event -> fd , NULL , 0 );
723
- WSACloseEvent (set -> handles [cur_event -> pos + 1 ]);
741
+ WSACloseEvent (set -> handles [cur_event -> index + 1 ]);
724
742
}
725
743
}
726
744
#endif
@@ -761,6 +779,7 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
761
779
void * user_data )
762
780
{
763
781
WaitEvent * event ;
782
+ int free_event ;
764
783
765
784
/* not enough space */
766
785
Assert (set -> nevents < set -> nevents_space );
@@ -790,8 +809,20 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
790
809
if (fd == PGINVALID_SOCKET && (events & WL_SOCKET_MASK ))
791
810
elog (ERROR , "cannot wait on socket event without a socket" );
792
811
793
- event = & set -> events [set -> nevents ];
794
- event -> pos = set -> nevents ++ ;
812
+ free_event = set -> free_events ;
813
+ if (free_event >= 0 )
814
+ {
815
+ event = & set -> events [free_event ];
816
+ set -> free_events = event -> pos ;
817
+ event -> pos = free_event ;
818
+ }
819
+ else
820
+ {
821
+ event = & set -> events [set -> nevents ];
822
+ event -> pos = set -> nevents ;
823
+ }
824
+ set -> permutation [set -> nevents ] = event -> pos ;
825
+ event -> index = set -> nevents ++ ;
795
826
event -> fd = fd ;
796
827
event -> events = events ;
797
828
event -> user_data = user_data ;
@@ -820,14 +851,54 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
820
851
#elif defined(WAIT_USE_KQUEUE )
821
852
WaitEventAdjustKqueue (set , event , 0 );
822
853
#elif defined(WAIT_USE_POLL )
823
- WaitEventAdjustPoll (set , event );
854
+ WaitEventAdjustPoll (set , event , false );
824
855
#elif defined(WAIT_USE_WIN32 )
825
- WaitEventAdjustWin32 (set , event );
856
+ WaitEventAdjustWin32 (set , event , false );
826
857
#endif
827
858
828
859
return event -> pos ;
829
860
}
830
861
862
+ /*
863
+ * Remove event with specified position in event set.
864
+ *
865
+ * 'pos' is the id returned by AddWaitEventToSet.
866
+ */
867
+ void
868
+ DeleteWaitEvent (WaitEventSet * set , int pos )
869
+ {
870
+ WaitEvent * event ;
871
+ #if defined(WAIT_USE_KQUEUE )
872
+ int old_events ;
873
+ #endif
874
+
875
+ Assert (pos < set -> nevents_space );
876
+ event = & set -> events [pos ];
877
+
878
+ #if defined(WAIT_USE_EPOLL )
879
+ WaitEventAdjustEpoll (set , event , EPOLL_CTL_DEL );
880
+ #elif defined(WAIT_USE_KQUEUE )
881
+ old_events = event -> events ;
882
+ event -> events = 0 ;
883
+ WaitEventAdjustKqueue (set , event , old_events );
884
+ #elif defined(WAIT_USE_POLL )
885
+ WaitEventAdjustPoll (set , event , true);
886
+ #elif defined(WAIT_USE_WIN32 )
887
+ WaitEventAdjustWin32 (set , event , true);
888
+ #endif
889
+ if (-- set -> nevents != 0 )
890
+ {
891
+ set -> permutation [event -> index ] = set -> permutation [set -> nevents ];
892
+ set -> events [set -> permutation [set -> nevents ]].index = event -> index ;
893
+ }
894
+ event -> fd = PGINVALID_SOCKET ;
895
+ event -> events = 0 ;
896
+ event -> index = -1 ;
897
+ event -> pos = set -> free_events ;
898
+ set -> free_events = pos ;
899
+ }
900
+
901
+
831
902
/*
832
903
* Change the event mask and, in the WL_LATCH_SET case, the latch associated
833
904
* with the WaitEvent.
@@ -842,7 +913,7 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
842
913
int old_events ;
843
914
#endif
844
915
845
- Assert (pos < set -> nevents );
916
+ Assert (pos < set -> nevents_space );
846
917
847
918
event = & set -> events [pos ];
848
919
#if defined(WAIT_USE_KQUEUE )
@@ -884,9 +955,9 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
884
955
#elif defined(WAIT_USE_KQUEUE )
885
956
WaitEventAdjustKqueue (set , event , old_events );
886
957
#elif defined(WAIT_USE_POLL )
887
- WaitEventAdjustPoll (set , event );
958
+ WaitEventAdjustPoll (set , event , false );
888
959
#elif defined(WAIT_USE_WIN32 )
889
- WaitEventAdjustWin32 (set , event );
960
+ WaitEventAdjustWin32 (set , event , false );
890
961
#endif
891
962
}
892
963
@@ -933,7 +1004,20 @@ WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action)
933
1004
*/
934
1005
rc = epoll_ctl (set -> epoll_fd , action , event -> fd , & epoll_ev );
935
1006
936
- if (rc < 0 )
1007
+ /*
1008
+ * Skip throwing error in case of EPOLL_CTL_DEL. Upon connection error
1009
+ * libpq may or may not close the socket, so epfd can disappear.
1010
+ *
1011
+ * XXX it is not entirely clear which errnos should be checked
1012
+ * here. According to the mans I would say it is 'EBADF' (closed socket is
1013
+ * not valid, right?), any simple test on my 5.1.11 debian agrees with
1014
+ * that. However, msvs-6-3 bf machine with 2.6.32 spits out ENOENT (under
1015
+ * dmq) despite evidently correct usage (we don't DEL the same fd
1016
+ * twice). EINVAL was also historically checked here.
1017
+ */
1018
+ if (rc < 0 &&
1019
+ !(action == EPOLL_CTL_DEL &&
1020
+ (errno == EBADF || errno == EINVAL || errno == ENOENT )))
937
1021
ereport (ERROR ,
938
1022
(errcode_for_socket_access (),
939
1023
/* translator: %s is a syscall name, such as "poll()" */
@@ -944,11 +1028,16 @@ WaitEventAdjustEpoll(WaitEventSet *set, WaitEvent *event, int action)
944
1028
945
1029
#if defined(WAIT_USE_POLL )
946
1030
static void
947
- WaitEventAdjustPoll (WaitEventSet * set , WaitEvent * event )
1031
+ WaitEventAdjustPoll (WaitEventSet * set , WaitEvent * event , bool remove )
948
1032
{
949
- struct pollfd * pollfd = & set -> pollfds [event -> pos ];
1033
+ struct pollfd * pollfd = & set -> pollfds [event -> index ];
1034
+
1035
+ if (remove )
1036
+ {
1037
+ * pollfd = set -> pollfds [set -> nevents - 1 ]; /* nevents is not decremented yet */
1038
+ return ;
1039
+ }
950
1040
951
- pollfd -> revents = 0 ;
952
1041
pollfd -> fd = event -> fd ;
953
1042
954
1043
/* prepare pollfd entry once */
@@ -1088,7 +1177,11 @@ WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events)
1088
1177
if (event -> events == WL_POSTMASTER_DEATH &&
1089
1178
(errno == ESRCH || errno == EACCES ))
1090
1179
set -> report_postmaster_not_running = true;
1091
- else
1180
+ /*
1181
+ * Like in WaitEventAdjustEpoll, don't throw if we are trying to
1182
+ * remove already closed socket. FIXME: ensure this check is right.
1183
+ */
1184
+ else if (!(event -> events == 0 && errno == EBADF ))
1092
1185
ereport (ERROR ,
1093
1186
(errcode_for_socket_access (),
1094
1187
/* translator: %s is a syscall name, such as "poll()" */
@@ -1112,9 +1205,21 @@ WaitEventAdjustKqueue(WaitEventSet *set, WaitEvent *event, int old_events)
1112
1205
1113
1206
#if defined(WAIT_USE_WIN32 )
1114
1207
static void
1115
- WaitEventAdjustWin32 (WaitEventSet * set , WaitEvent * event )
1208
+ WaitEventAdjustWin32 (WaitEventSet * set , WaitEvent * event , bool remove )
1116
1209
{
1117
- HANDLE * handle = & set -> handles [event -> pos + 1 ];
1210
+ HANDLE * handle = & set -> handles [event -> index + 1 ];
1211
+
1212
+ if (remove )
1213
+ {
1214
+ Assert (event -> fd != PGINVALID_SOCKET );
1215
+
1216
+ if (* handle != WSA_INVALID_EVENT )
1217
+ WSACloseEvent (* handle );
1218
+
1219
+ * handle = set -> handles [set -> nevents ]; /* nevents is not decremented yet but we need to add 1 to the index */
1220
+ set -> handles [set -> nevents ] = WSA_INVALID_EVENT ;
1221
+ return ;
1222
+ }
1118
1223
1119
1224
if (event -> events == WL_LATCH_SET )
1120
1225
{
@@ -1562,11 +1667,12 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1562
1667
{
1563
1668
int returned_events = 0 ;
1564
1669
int rc ;
1565
- WaitEvent * cur_event ;
1566
- struct pollfd * cur_pollfd ;
1670
+ int i ;
1671
+ struct pollfd * cur_pollfd = set -> pollfds ;
1672
+ WaitEvent * cur_event ;
1567
1673
1568
1674
/* Sleep */
1569
- rc = poll (set -> pollfds , set -> nevents , (int ) cur_timeout );
1675
+ rc = poll (cur_pollfd , set -> nevents , (int ) cur_timeout );
1570
1676
1571
1677
/* Check return code */
1572
1678
if (rc < 0 )
@@ -1589,15 +1695,13 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1589
1695
return -1 ;
1590
1696
}
1591
1697
1592
- for (cur_event = set -> events , cur_pollfd = set -> pollfds ;
1593
- cur_event < (set -> events + set -> nevents ) &&
1594
- returned_events < nevents ;
1595
- cur_event ++ , cur_pollfd ++ )
1698
+ for (i = 0 ; i < set -> nevents && returned_events < nevents ; i ++ , cur_pollfd ++ )
1596
1699
{
1597
1700
/* no activity on this FD, skip */
1598
1701
if (cur_pollfd -> revents == 0 )
1599
1702
continue ;
1600
1703
1704
+ cur_event = & set -> events [set -> permutation [i ]];
1601
1705
occurred_events -> pos = cur_event -> pos ;
1602
1706
occurred_events -> user_data = cur_event -> user_data ;
1603
1707
occurred_events -> events = 0 ;
@@ -1688,17 +1792,25 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1688
1792
WaitEvent * occurred_events , int nevents )
1689
1793
{
1690
1794
int returned_events = 0 ;
1795
+ int i ;
1691
1796
DWORD rc ;
1692
- WaitEvent * cur_event ;
1797
+ WaitEvent * cur_event ;
1693
1798
1694
1799
/* Reset any wait events that need it */
1695
- for (cur_event = set -> events ;
1696
- cur_event < (set -> events + set -> nevents );
1697
- cur_event ++ )
1698
- {
1699
- if (cur_event -> reset )
1700
- {
1701
- WaitEventAdjustWin32 (set , cur_event );
1800
+ for (i = 0 ; i < set -> nevents ; i ++ )
1801
+ {
1802
+ cur_event = & set -> events [set -> permutation [i ]];
1803
+
1804
+ /*
1805
+ * I have problem at Windows when SSPI connections "hanged" in WaitForMultipleObjects which
1806
+ * doesn't signal presence of input data (while it is possible to read this data from the socket).
1807
+ * Looks like "reset" logic is not completely correct (resetting event just after
1808
+ * receiveing presious read event). Reseting all read events fixes this problem.
1809
+ */
1810
+ if (cur_event -> events & WL_SOCKET_READABLE )
1811
+ /* if (cur_event->reset) */
1812
+ {
1813
+ WaitEventAdjustWin32 (set , cur_event , false);
1702
1814
cur_event -> reset = false;
1703
1815
}
1704
1816
@@ -1764,7 +1876,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1764
1876
* With an offset of one, due to the always present pgwin32_signal_event,
1765
1877
* the handle offset directly corresponds to a wait event.
1766
1878
*/
1767
- cur_event = (WaitEvent * ) & set -> events [rc - WAIT_OBJECT_0 - 1 ];
1879
+ cur_event = (WaitEvent * ) & set -> events [set -> permutation [ rc - WAIT_OBJECT_0 - 1 ] ];
1768
1880
1769
1881
occurred_events -> pos = cur_event -> pos ;
1770
1882
occurred_events -> user_data = cur_event -> user_data ;
@@ -1805,7 +1917,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
1805
1917
else if (cur_event -> events & WL_SOCKET_MASK )
1806
1918
{
1807
1919
WSANETWORKEVENTS resEvents ;
1808
- HANDLE handle = set -> handles [cur_event -> pos + 1 ];
1920
+ HANDLE handle = set -> handles [cur_event -> index + 1 ];
1809
1921
1810
1922
Assert (cur_event -> fd );
1811
1923
0 commit comments